Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
3e1e482b
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
3e1e482b
编写于
9月 26, 2022
作者:
C
cifar10
提交者:
GitHub
9月 26, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[MLU] fluid: add mluop (#46429)
上级
b0ec8efb
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
434 addition
and
47 deletion
+434
-47
cmake/neuware.cmake
cmake/neuware.cmake
+3
-1
paddle/fluid/operators/mlu/mlu_baseop.cc
paddle/fluid/operators/mlu/mlu_baseop.cc
+209
-11
paddle/fluid/operators/mlu/mlu_baseop.h
paddle/fluid/operators/mlu/mlu_baseop.h
+121
-0
paddle/fluid/platform/device/mlu/device_context.cc
paddle/fluid/platform/device/mlu/device_context.cc
+10
-1
paddle/fluid/platform/device/mlu/device_context.h
paddle/fluid/platform/device/mlu/device_context.h
+19
-0
paddle/fluid/platform/device/mlu/enforce.h
paddle/fluid/platform/device/mlu/enforce.h
+10
-0
paddle/fluid/platform/device/mlu/mlu_info.cc
paddle/fluid/platform/device/mlu/mlu_info.cc
+7
-0
paddle/fluid/platform/device/mlu/mlu_info.h
paddle/fluid/platform/device/mlu/mlu_info.h
+7
-1
python/paddle/fluid/tests/unittests/mlu/test_grid_sampler_op_mlu.py
...dle/fluid/tests/unittests/mlu/test_grid_sampler_op_mlu.py
+19
-20
python/paddle/fluid/tests/unittests/mlu/test_reduce_sum_op_mlu.py
...addle/fluid/tests/unittests/mlu/test_reduce_sum_op_mlu.py
+5
-0
tools/dockerfile/Dockerfile.mlu
tools/dockerfile/Dockerfile.mlu
+24
-13
未找到文件。
cmake/neuware.cmake
浏览文件 @
3e1e482b
...
@@ -15,12 +15,14 @@ set(NEUWARE_LIB_DIR ${NEUWARE_HOME}/lib64)
...
@@ -15,12 +15,14 @@ set(NEUWARE_LIB_DIR ${NEUWARE_HOME}/lib64)
include_directories
(
${
NEUWARE_INCLUDE_DIR
}
)
include_directories
(
${
NEUWARE_INCLUDE_DIR
}
)
set
(
CNNL_LIB
${
NEUWARE_LIB_DIR
}
/libcnnl.so
)
set
(
CNNL_LIB
${
NEUWARE_LIB_DIR
}
/libcnnl.so
)
set
(
MLUOP_LIB
${
NEUWARE_LIB_DIR
}
/libmluops.so
)
set
(
CNRT_LIB
${
NEUWARE_LIB_DIR
}
/libcnrt.so
)
set
(
CNRT_LIB
${
NEUWARE_LIB_DIR
}
/libcnrt.so
)
set
(
CNDRV_LIB
${
NEUWARE_LIB_DIR
}
/libcndrv.so
)
set
(
CNDRV_LIB
${
NEUWARE_LIB_DIR
}
/libcndrv.so
)
set
(
CNPAPI_LIB
${
NEUWARE_LIB_DIR
}
/libcnpapi.so
)
set
(
CNPAPI_LIB
${
NEUWARE_LIB_DIR
}
/libcnpapi.so
)
generate_dummy_static_lib
(
LIB_NAME
"neuware_lib"
GENERATOR
"neuware.cmake"
)
generate_dummy_static_lib
(
LIB_NAME
"neuware_lib"
GENERATOR
"neuware.cmake"
)
set
(
NEUWARE_LIB_DEPS
${
CNNL_LIB
}
${
CNRT_LIB
}
${
CNDRV_LIB
}
${
CNPAPI_LIB
}
)
set
(
NEUWARE_LIB_DEPS
${
CNNL_LIB
}
${
MLUOP_LIB
}
${
CNRT_LIB
}
${
CNDRV_LIB
}
${
CNPAPI_LIB
}
)
if
(
WITH_CNCL
)
if
(
WITH_CNCL
)
message
(
STATUS
"Compile with CNCL!"
)
message
(
STATUS
"Compile with CNCL!"
)
...
...
paddle/fluid/operators/mlu/mlu_baseop.cc
浏览文件 @
3e1e482b
...
@@ -256,6 +256,186 @@ MLUCnnlTensorDesc::~MLUCnnlTensorDesc() {
...
@@ -256,6 +256,186 @@ MLUCnnlTensorDesc::~MLUCnnlTensorDesc() {
}
}
}
}
class
MLUOpTensorDescPool
{
public:
mluOpTensorDescriptor_t
Pop
()
{
mluOpTensorDescriptor_t
raw_desc
;
if
(
q_
.
try_dequeue
(
raw_desc
))
{
return
raw_desc
;
}
else
{
mluOpCreateTensorDescriptor
(
&
raw_desc
);
return
raw_desc
;
}
}
void
Recycle
(
mluOpTensorDescriptor_t
desc
)
{
mluOpResetTensorDescriptor
(
desc
);
q_
.
enqueue
(
desc
);
}
~
MLUOpTensorDescPool
()
{
auto
size
=
q_
.
size_approx
();
if
(
size
>
0
)
{
std
::
vector
<
mluOpTensorDescriptor_t
>
vec
(
size
);
q_
.
try_dequeue_bulk
(
vec
.
data
(),
size
);
for
(
auto
desc
:
vec
)
{
mluOpDestroyTensorDescriptor
(
desc
);
}
}
}
private:
moodycamel
::
ConcurrentQueue
<
mluOpTensorDescriptor_t
>
q_
;
};
static
MLUOpTensorDescPool
g_mluop_tensor_desc_pool
;
MLUOpTensorDesc
&
MLUOpTensorDesc
::
operator
=
(
MLUOpTensorDesc
&&
rhs
)
{
if
(
raw_tensor_desc
)
{
g_mluop_tensor_desc_pool
.
Recycle
(
raw_tensor_desc
);
}
raw_tensor_desc
=
rhs
.
raw_tensor_desc
;
rhs
.
raw_tensor_desc
=
nullptr
;
return
*
this
;
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
)
{
raw_tensor_desc
=
g_mluop_tensor_desc_pool
.
Pop
();
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptor
(
raw_tensor_desc
,
MLUOP_LAYOUT_ARRAY
,
tensor_dtype
,
tensor_dim
,
dim_sizes
));
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
const
mluOpTensorLayout_t
layout
)
{
raw_tensor_desc
=
g_mluop_tensor_desc_pool
.
Pop
();
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptor
(
raw_tensor_desc
,
layout
,
tensor_dtype
,
tensor_dim
,
dim_sizes
));
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
int
position
)
:
MLUOpTensorDesc
(
tensor_dim
,
dim_sizes
,
tensor_dtype
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptorPosition
(
raw_tensor_desc
,
position
));
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
)
{
std
::
vector
<
int
>
dim_sizes_int32
(
tensor_dim
);
std
::
vector
<
int64_t
>::
const_iterator
int64_cbegin
(
dim_sizes
);
std
::
vector
<
int64_t
>::
const_iterator
int64_cend
(
dim_sizes
+
tensor_dim
);
std
::
transform
(
int64_cbegin
,
int64_cend
,
dim_sizes_int32
.
begin
(),
&
CheckedNarrowing
<
int64_t
,
int
>
);
raw_tensor_desc
=
g_mluop_tensor_desc_pool
.
Pop
();
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptor
(
raw_tensor_desc
,
MLUOP_LAYOUT_ARRAY
,
tensor_dtype
,
tensor_dim
,
dim_sizes_int32
.
data
()));
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
const
mluOpTensorLayout_t
layout
)
{
std
::
vector
<
int
>
dim_sizes_int32
(
tensor_dim
);
std
::
vector
<
int64_t
>::
const_iterator
int64_cbegin
(
dim_sizes
);
std
::
vector
<
int64_t
>::
const_iterator
int64_cend
(
dim_sizes
+
tensor_dim
);
std
::
transform
(
int64_cbegin
,
int64_cend
,
dim_sizes_int32
.
begin
(),
&
CheckedNarrowing
<
int64_t
,
int
>
);
raw_tensor_desc
=
g_mluop_tensor_desc_pool
.
Pop
();
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptor
(
raw_tensor_desc
,
layout
,
tensor_dtype
,
tensor_dim
,
dim_sizes_int32
.
data
()));
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
int
position
)
{
std
::
vector
<
int
>
dim_sizes_int32
(
tensor_dim
);
std
::
vector
<
int64_t
>::
const_iterator
int64_cbegin
(
dim_sizes
);
std
::
vector
<
int64_t
>::
const_iterator
int64_cend
(
dim_sizes
+
tensor_dim
);
std
::
transform
(
int64_cbegin
,
int64_cend
,
dim_sizes_int32
.
begin
(),
&
CheckedNarrowing
<
int64_t
,
int
>
);
raw_tensor_desc
=
g_mluop_tensor_desc_pool
.
Pop
();
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptor
(
raw_tensor_desc
,
MLUOP_LAYOUT_ARRAY
,
tensor_dtype
,
tensor_dim
,
dim_sizes_int32
.
data
()));
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptorPosition
(
raw_tensor_desc
,
position
));
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
Tensor
&
tensor
,
const
mluOpTensorLayout_t
layout
,
const
mluOpDataType_t
tensor_dtype
)
{
auto
dims
=
phi
::
vectorize
<
int
>
(
tensor
.
dims
());
int
tensor_dim
=
dims
.
size
();
raw_tensor_desc
=
g_mluop_tensor_desc_pool
.
Pop
();
if
(
tensor_dim
==
0
)
{
int
scalar_dims
[
1
]
=
{
1
};
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptor
(
raw_tensor_desc
,
layout
,
tensor_dtype
,
1
,
scalar_dims
));
}
else
{
std
::
vector
<
int
>
tensor_dim_sizes_int
(
dims
.
begin
(),
dims
.
end
());
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptor
(
raw_tensor_desc
,
layout
,
tensor_dtype
,
tensor_dim
,
tensor_dim_sizes_int
.
data
()));
}
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
Tensor
&
tensor
)
:
MLUOpTensorDesc
(
tensor
,
MLUOP_LAYOUT_ARRAY
,
ToMluOpDataType
(
tensor
.
dtype
()))
{}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
Tensor
&
tensor
,
mluOpTensorLayout_t
layout
,
const
mluOpDataType_t
tensor_dtype
,
int
position
)
:
MLUOpTensorDesc
(
tensor
,
layout
,
tensor_dtype
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptorPosition
(
raw_tensor_desc
,
position
));
}
MLUOpTensorDesc
::
MLUOpTensorDesc
(
const
Tensor
&
tensor
,
mluOpTensorLayout_t
layout
,
const
mluOpDataType_t
tensor_dtype
,
int
position
,
float
scale
)
:
MLUOpTensorDesc
(
tensor
,
layout
,
tensor_dtype
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetTensorDescriptorPositionAndScale
(
raw_tensor_desc
,
position
,
scale
));
}
MLUOpTensorDesc
::~
MLUOpTensorDesc
()
{
if
(
raw_tensor_desc
)
{
g_mluop_tensor_desc_pool
.
Recycle
(
raw_tensor_desc
);
}
}
MLUCnnlActivationDesc
::
MLUCnnlActivationDesc
(
MLUCnnlActivationDesc
::
MLUCnnlActivationDesc
(
const
cnnlActivationMode_t
act_mode
,
const
float
ceof
)
{
const
cnnlActivationMode_t
act_mode
,
const
float
ceof
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlCreateActivationDescriptor
(
&
active_desc_
));
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlCreateActivationDescriptor
(
&
active_desc_
));
...
@@ -1563,17 +1743,35 @@ MLURNNDesc::~MLURNNDesc() {
...
@@ -1563,17 +1743,35 @@ MLURNNDesc::~MLURNNDesc() {
void
*
indices_out
)
{
void
*
indices_out
)
{
cnnlHandle_t
handle
=
GetHandleFromCTX
(
ctx
);
cnnlHandle_t
handle
=
GetHandleFromCTX
(
ctx
);
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlTopKTensor
(
handle
,
size_t
workspace_size
;
input_desc
,
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlGetTopKTensorWorkspaceSize
(
handle
,
input
,
input_desc
,
k
,
k
,
dim
,
dim
,
largest
,
largest
,
sorted
,
values_output_desc
,
values_output_desc
,
indices_output_desc
,
values_out
,
&
workspace_size
));
indices_output_desc
,
indices_out
));
auto
&
dev_ctx
=
GetDevCtxFromCTX
(
ctx
);
Tensor
workspace
=
ctx
.
AllocateTmpTensor
<
int8_t
,
MLUDeviceContext
>
(
{
static_cast
<
int64_t
>
(
workspace_size
)},
dev_ctx
);
void
*
workspace_ptr
=
workspace
.
mutable_data
(
ctx
.
GetPlace
());
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlTopKTensor_v3
(
handle
,
input_desc
,
input
,
k
,
dim
,
largest
,
sorted
,
false
/*lower_index_first*/
,
workspace_ptr
,
workspace_size
,
values_output_desc
,
values_out
,
indices_output_desc
,
indices_out
));
}
}
/* static */
void
MLUCnnl
::
StridedSlice
(
/* static */
void
MLUCnnl
::
StridedSlice
(
...
...
paddle/fluid/operators/mlu/mlu_baseop.h
浏览文件 @
3e1e482b
...
@@ -16,6 +16,7 @@ limitations under the License. */
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include <cn_api.h>
#include <cn_api.h>
#include <cnnl.h>
#include <cnnl.h>
#include <concurrentqueue.h>
#include <concurrentqueue.h>
#include <mlu_op.h>
#include <string>
#include <string>
#include <vector>
#include <vector>
...
@@ -138,6 +139,54 @@ inline cnnlDataType_t ToCnnlDataType() {
...
@@ -138,6 +139,54 @@ inline cnnlDataType_t ToCnnlDataType() {
return
ToCnnlDataType
(
type
);
return
ToCnnlDataType
(
type
);
}
}
inline
mluOpDataType_t
ToMluOpDataType
(
const
paddle
::
experimental
::
DataType
&
dtype
)
{
mluOpDataType_t
type
=
MLUOP_DTYPE_FLOAT
;
switch
(
dtype
)
{
case
DataType
::
FLOAT16
:
type
=
MLUOP_DTYPE_HALF
;
break
;
case
DataType
::
FLOAT32
:
type
=
MLUOP_DTYPE_FLOAT
;
break
;
case
DataType
::
FLOAT64
:
type
=
MLUOP_DTYPE_DOUBLE
;
break
;
case
DataType
::
INT8
:
type
=
MLUOP_DTYPE_INT8
;
break
;
case
DataType
::
INT16
:
type
=
MLUOP_DTYPE_INT16
;
break
;
case
DataType
::
INT32
:
type
=
MLUOP_DTYPE_INT32
;
break
;
case
DataType
::
INT64
:
type
=
MLUOP_DTYPE_INT64
;
break
;
case
DataType
::
BOOL
:
type
=
MLUOP_DTYPE_BOOL
;
break
;
case
DataType
::
UINT8
:
type
=
MLUOP_DTYPE_UINT8
;
break
;
default:
break
;
}
return
type
;
}
inline
mluOpDataType_t
ToMluOpDataType
(
const
paddle
::
framework
::
proto
::
VarType
::
Type
&
type
)
{
return
ToMluOpDataType
(
framework
::
TransToPhiDataType
(
type
));
}
template
<
typename
T
>
inline
mluOpDataType_t
ToMluOpDataType
()
{
auto
type
=
framework
::
ToDataType
(
std
::
type_index
(
typeid
(
T
)));
return
ToMluOpDataType
(
type
);
}
// Converts (via narrowing) a type T value to a type U, and checks that the
// Converts (via narrowing) a type T value to a type U, and checks that the
// value has no value change due to the conversion.
// value has no value change due to the conversion.
template
<
typename
WideT
,
typename
NarrowT
>
template
<
typename
WideT
,
typename
NarrowT
>
...
@@ -152,6 +201,10 @@ inline static cnnlHandle_t GetHandleFromCTX(const ExecutionContext& ctx) {
...
@@ -152,6 +201,10 @@ inline static cnnlHandle_t GetHandleFromCTX(const ExecutionContext& ctx) {
return
ctx
.
template
device_context
<
MLUDeviceContext
>().
cnnl_handle
();
return
ctx
.
template
device_context
<
MLUDeviceContext
>().
cnnl_handle
();
}
}
inline
static
mluOpHandle_t
GetMLUOpHandleFromCTX
(
const
ExecutionContext
&
ctx
)
{
return
ctx
.
template
device_context
<
MLUDeviceContext
>().
mluOp_handle
();
}
inline
static
const
MLUDeviceContext
&
GetDevCtxFromCTX
(
inline
static
const
MLUDeviceContext
&
GetDevCtxFromCTX
(
const
ExecutionContext
&
ctx
)
{
const
ExecutionContext
&
ctx
)
{
return
ctx
.
template
device_context
<
MLUDeviceContext
>();
return
ctx
.
template
device_context
<
MLUDeviceContext
>();
...
@@ -281,6 +334,74 @@ class MLUCnnlTensorDesc {
...
@@ -281,6 +334,74 @@ class MLUCnnlTensorDesc {
cnnlTensorDescriptor_t
raw_tensor_desc
=
nullptr
;
cnnlTensorDescriptor_t
raw_tensor_desc
=
nullptr
;
};
};
class
MLUOpTensorDesc
{
public:
MLUOpTensorDesc
()
{}
// SE_DISALLOW_COPY_AND_ASSIGN
MLUOpTensorDesc
(
const
MLUOpTensorDesc
&
desc
)
=
delete
;
MLUOpTensorDesc
&
operator
=
(
const
MLUOpTensorDesc
&
)
=
delete
;
MLUOpTensorDesc
(
MLUOpTensorDesc
&&
rhs
)
:
raw_tensor_desc
(
rhs
.
raw_tensor_desc
)
{
rhs
.
raw_tensor_desc
=
nullptr
;
}
MLUOpTensorDesc
&
operator
=
(
MLUOpTensorDesc
&&
rhs
);
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
);
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
const
mluOpTensorLayout_t
layout
);
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
int
position
);
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
);
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
const
mluOpTensorLayout_t
layout
);
MLUOpTensorDesc
(
const
int
tensor_dim
,
const
int64_t
dim_sizes
[],
const
mluOpDataType_t
tensor_dtype
,
int
position
);
MLUOpTensorDesc
(
const
Tensor
&
tensor
,
const
mluOpTensorLayout_t
layout
,
const
mluOpDataType_t
tensor_dtype
);
explicit
MLUOpTensorDesc
(
const
Tensor
&
tensor
);
MLUOpTensorDesc
(
const
Tensor
&
tensor
,
mluOpTensorLayout_t
layout
,
const
mluOpDataType_t
tensor_dtype
,
int
position
);
MLUOpTensorDesc
(
const
Tensor
&
tensor
,
mluOpTensorLayout_t
layout
,
const
mluOpDataType_t
tensor_dtype
,
int
position
,
float
scale
);
~
MLUOpTensorDesc
();
const
mluOpTensorDescriptor_t
get
()
const
{
return
raw_tensor_desc
;
}
private:
mluOpTensorDescriptor_t
raw_tensor_desc
=
nullptr
;
};
class
MLUCnnlActivationDesc
{
class
MLUCnnlActivationDesc
{
public:
public:
MLUCnnlActivationDesc
(
const
MLUCnnlActivationDesc
&
desc
)
=
delete
;
MLUCnnlActivationDesc
(
const
MLUCnnlActivationDesc
&
desc
)
=
delete
;
...
...
paddle/fluid/platform/device/mlu/device_context.cc
浏览文件 @
3e1e482b
...
@@ -28,11 +28,13 @@ MLUContext::MLUContext(const MLUPlace& place, const int priority) {
...
@@ -28,11 +28,13 @@ MLUContext::MLUContext(const MLUPlace& place, const int priority) {
MLUDeviceGuard
guard
(
place_
.
device
);
MLUDeviceGuard
guard
(
place_
.
device
);
stream_
.
reset
(
new
stream
::
MLUStream
(
place_
,
priority
));
stream_
.
reset
(
new
stream
::
MLUStream
(
place_
,
priority
));
InitCNNLContext
();
InitCNNLContext
();
InitMLUOPContext
();
}
}
MLUContext
::~
MLUContext
()
{
MLUContext
::~
MLUContext
()
{
MLUDeviceGuard
guard
(
place_
.
device
);
MLUDeviceGuard
guard
(
place_
.
device
);
DestoryCNNLContext
();
DestoryCNNLContext
();
DestoryMLUOPContext
();
}
}
MLUDeviceContext
::
MLUDeviceContext
(
MLUPlace
place
)
:
place_
(
place
)
{
MLUDeviceContext
::
MLUDeviceContext
(
MLUPlace
place
)
:
place_
(
place
)
{
...
@@ -41,6 +43,7 @@ MLUDeviceContext::MLUDeviceContext(MLUPlace place) : place_(place) {
...
@@ -41,6 +43,7 @@ MLUDeviceContext::MLUDeviceContext(MLUPlace place) : place_(place) {
driver_version_
=
GetMLUDriverVersion
(
place_
.
device
);
driver_version_
=
GetMLUDriverVersion
(
place_
.
device
);
runtime_version_
=
GetMLURuntimeVersion
(
place_
.
device
);
runtime_version_
=
GetMLURuntimeVersion
(
place_
.
device
);
cnnl_version_
=
GetMLUCnnlVersion
(
place_
.
device
);
cnnl_version_
=
GetMLUCnnlVersion
(
place_
.
device
);
mluOp_version_
=
GetMLUOpVersion
(
place_
.
device
);
LOG_FIRST_N
(
WARNING
,
1
)
LOG_FIRST_N
(
WARNING
,
1
)
<<
"Please NOTE: device: "
<<
static_cast
<
int
>
(
place_
.
device
)
<<
"Please NOTE: device: "
<<
static_cast
<
int
>
(
place_
.
device
)
...
@@ -51,7 +54,9 @@ MLUDeviceContext::MLUDeviceContext(MLUPlace place) : place_(place) {
...
@@ -51,7 +54,9 @@ MLUDeviceContext::MLUDeviceContext(MLUPlace place) : place_(place) {
<<
", Runtime API Version: "
<<
runtime_version_
/
10000
<<
"."
<<
", Runtime API Version: "
<<
runtime_version_
/
10000
<<
"."
<<
(
runtime_version_
/
100
)
%
100
<<
"."
<<
runtime_version_
%
100
<<
(
runtime_version_
/
100
)
%
100
<<
"."
<<
runtime_version_
%
100
<<
", Cnnl API Version: "
<<
cnnl_version_
/
10000
<<
"."
<<
", Cnnl API Version: "
<<
cnnl_version_
/
10000
<<
"."
<<
(
cnnl_version_
/
100
)
%
100
<<
"."
<<
cnnl_version_
%
100
;
<<
(
cnnl_version_
/
100
)
%
100
<<
"."
<<
cnnl_version_
%
100
<<
", MluOp API Version: "
<<
mluOp_version_
/
10000
<<
"."
<<
(
mluOp_version_
/
100
)
%
100
<<
"."
<<
mluOp_version_
%
100
;
default_ctx_
.
reset
(
new
MLUContext
(
place_
));
default_ctx_
.
reset
(
new
MLUContext
(
place_
));
}
}
...
@@ -70,6 +75,10 @@ mluCnnlHandle MLUDeviceContext::cnnl_handle() const {
...
@@ -70,6 +75,10 @@ mluCnnlHandle MLUDeviceContext::cnnl_handle() const {
return
context
()
->
CnnlHandle
();
return
context
()
->
CnnlHandle
();
}
}
mluOpHandle
MLUDeviceContext
::
mluOp_handle
()
const
{
return
context
()
->
MluOpHandle
();
}
mluStream
MLUDeviceContext
::
stream
()
const
{
return
context
()
->
RawStream
();
}
mluStream
MLUDeviceContext
::
stream
()
const
{
return
context
()
->
RawStream
();
}
#endif
#endif
...
...
paddle/fluid/platform/device/mlu/device_context.h
浏览文件 @
3e1e482b
...
@@ -53,12 +53,19 @@ class MLUContext {
...
@@ -53,12 +53,19 @@ class MLUContext {
const
mluCnnlHandle
&
CnnlHandle
()
const
{
return
cnnl_handle_
;
}
const
mluCnnlHandle
&
CnnlHandle
()
const
{
return
cnnl_handle_
;
}
const
mluOpHandle
&
MluOpHandle
()
const
{
return
mluOp_handle_
;
}
private:
private:
void
InitCNNLContext
()
{
void
InitCNNLContext
()
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlCreate
(
&
cnnl_handle_
));
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlCreate
(
&
cnnl_handle_
));
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlSetQueue
(
cnnl_handle_
,
RawStream
()));
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlSetQueue
(
cnnl_handle_
,
RawStream
()));
}
}
void
InitMLUOPContext
()
{
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpCreate
(
&
mluOp_handle_
));
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpSetQueue
(
mluOp_handle_
,
RawStream
()));
}
void
DestoryCNNLContext
()
{
void
DestoryCNNLContext
()
{
if
(
cnnl_handle_
)
{
if
(
cnnl_handle_
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlDestroy
(
cnnl_handle_
));
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlDestroy
(
cnnl_handle_
));
...
@@ -66,10 +73,18 @@ class MLUContext {
...
@@ -66,10 +73,18 @@ class MLUContext {
cnnl_handle_
=
nullptr
;
cnnl_handle_
=
nullptr
;
}
}
void
DestoryMLUOPContext
()
{
if
(
mluOp_handle_
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
mluOpDestroy
(
mluOp_handle_
));
}
mluOp_handle_
=
nullptr
;
}
MLUPlace
place_
;
MLUPlace
place_
;
std
::
unique_ptr
<
Eigen
::
DefaultDevice
>
eigen_device_
;
std
::
unique_ptr
<
Eigen
::
DefaultDevice
>
eigen_device_
;
std
::
unique_ptr
<
stream
::
MLUStream
>
stream_
;
std
::
unique_ptr
<
stream
::
MLUStream
>
stream_
;
mluCnnlHandle
cnnl_handle_
;
mluCnnlHandle
cnnl_handle_
;
mluOpHandle
mluOp_handle_
;
DISABLE_COPY_AND_ASSIGN
(
MLUContext
);
DISABLE_COPY_AND_ASSIGN
(
MLUContext
);
};
};
...
@@ -89,6 +104,9 @@ class MLUDeviceContext : public DeviceContext {
...
@@ -89,6 +104,9 @@ class MLUDeviceContext : public DeviceContext {
/*! \brief Return cnnl handle in the device context. */
/*! \brief Return cnnl handle in the device context. */
mluCnnlHandle
cnnl_handle
()
const
;
mluCnnlHandle
cnnl_handle
()
const
;
/*! \brief Return mluOp handle in the device context. */
mluOpHandle
mluOp_handle
()
const
;
/*! \brief Return mlu stream in the device context. */
/*! \brief Return mlu stream in the device context. */
mluStream
stream
()
const
;
mluStream
stream
()
const
;
...
@@ -135,6 +153,7 @@ class MLUDeviceContext : public DeviceContext {
...
@@ -135,6 +153,7 @@ class MLUDeviceContext : public DeviceContext {
int
driver_version_
;
int
driver_version_
;
int
runtime_version_
;
int
runtime_version_
;
int
cnnl_version_
;
int
cnnl_version_
;
int
mluOp_version_
;
MLUPlace
place_
;
MLUPlace
place_
;
std
::
shared_ptr
<
MLUContext
>
default_ctx_
;
std
::
shared_ptr
<
MLUContext
>
default_ctx_
;
...
...
paddle/fluid/platform/device/mlu/enforce.h
浏览文件 @
3e1e482b
...
@@ -41,6 +41,7 @@ struct MLUStatusType {};
...
@@ -41,6 +41,7 @@ struct MLUStatusType {};
DEFINE_MLU_STATUS_TYPE
(
cnrtStatus
,
cnrtSuccess
,
CNRT
);
DEFINE_MLU_STATUS_TYPE
(
cnrtStatus
,
cnrtSuccess
,
CNRT
);
DEFINE_MLU_STATUS_TYPE
(
cnnlStatus
,
CNNL_STATUS_SUCCESS
,
CNNL
);
DEFINE_MLU_STATUS_TYPE
(
cnnlStatus
,
CNNL_STATUS_SUCCESS
,
CNNL
);
DEFINE_MLU_STATUS_TYPE
(
mluOpStatus
,
MLUOP_STATUS_SUCCESS
,
MLUOP
);
DEFINE_MLU_STATUS_TYPE
(
cnStatus
,
CN_SUCCESS
,
CN
);
DEFINE_MLU_STATUS_TYPE
(
cnStatus
,
CN_SUCCESS
,
CN
);
#ifdef PADDLE_WITH_CNCL
#ifdef PADDLE_WITH_CNCL
DEFINE_MLU_STATUS_TYPE
(
cnclStatus
,
CNCL_RET_SUCCESS
,
CNCL
);
DEFINE_MLU_STATUS_TYPE
(
cnclStatus
,
CNCL_RET_SUCCESS
,
CNCL
);
...
@@ -68,6 +69,15 @@ inline std::string build_mlu_error_msg(cnnlStatus stat) {
...
@@ -68,6 +69,15 @@ inline std::string build_mlu_error_msg(cnnlStatus stat) {
return
sout
.
str
();
return
sout
.
str
();
}
}
/*************** MLU OP ERROR ***************/
inline
bool
is_error
(
mluOpStatus
stat
)
{
return
stat
!=
MLUOP_STATUS_SUCCESS
;
}
inline
std
::
string
build_mlu_error_msg
(
mluOpStatus
stat
)
{
std
::
ostringstream
sout
;
sout
<<
"MLU OP error("
<<
stat
<<
"), "
<<
mluOpGetErrorString
(
stat
)
<<
". "
;
return
sout
.
str
();
}
/*************** CN API ERROR ***************/
/*************** CN API ERROR ***************/
inline
bool
is_error
(
cnStatus
stat
)
{
return
stat
!=
CN_SUCCESS
;
}
inline
bool
is_error
(
cnStatus
stat
)
{
return
stat
!=
CN_SUCCESS
;
}
...
...
paddle/fluid/platform/device/mlu/mlu_info.cc
浏览文件 @
3e1e482b
...
@@ -126,6 +126,13 @@ int GetMLUCnnlVersion(int id) {
...
@@ -126,6 +126,13 @@ int GetMLUCnnlVersion(int id) {
return
x
*
10000
+
y
*
100
+
z
;
return
x
*
10000
+
y
*
100
+
z
;
}
}
int
GetMLUOpVersion
(
int
id
)
{
CheckDeviceId
(
id
);
int
x
,
y
,
z
;
mluOpGetLibVersion
(
&
x
,
&
y
,
&
z
);
return
x
*
10000
+
y
*
100
+
z
;
}
int
GetMLUCurrentDeviceId
()
{
int
GetMLUCurrentDeviceId
()
{
int
device_id
;
int
device_id
;
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtGetDevice
(
&
device_id
));
PADDLE_ENFORCE_MLU_SUCCESS
(
cnrtGetDevice
(
&
device_id
));
...
...
paddle/fluid/platform/device/mlu/mlu_info.h
浏览文件 @
3e1e482b
...
@@ -16,10 +16,11 @@ limitations under the License. */
...
@@ -16,10 +16,11 @@ limitations under the License. */
#ifdef PADDLE_WITH_MLU
#ifdef PADDLE_WITH_MLU
#include <cn_api.h>
#include <cn_api.h>
#include <cndrv_id.h>
#include <cnnl.h>
#include <cnnl.h>
#include <cnpapi.h>
#include <cnpapi.h>
#include <cnpapi_cndrv_id.h>
#include <cnrt.h>
#include <cnrt.h>
#include <mlu_op.h>
#ifdef PADDLE_WITH_CNCL
#ifdef PADDLE_WITH_CNCL
#include <cncl.h>
#include <cncl.h>
#endif
#endif
...
@@ -30,11 +31,13 @@ namespace paddle {
...
@@ -30,11 +31,13 @@ namespace paddle {
using
cnStatus
=
CNresult
;
using
cnStatus
=
CNresult
;
using
cnrtStatus
=
cnrtRet_t
;
using
cnrtStatus
=
cnrtRet_t
;
using
cnnlStatus
=
cnnlStatus_t
;
using
cnnlStatus
=
cnnlStatus_t
;
using
mluOpStatus
=
mluOpStatus_t
;
#ifdef PADDLE_WITH_CNCL
#ifdef PADDLE_WITH_CNCL
using
cnclStatus
=
cnclResult_t
;
using
cnclStatus
=
cnclResult_t
;
#endif
#endif
using
mluStream
=
cnrtQueue_t
;
using
mluStream
=
cnrtQueue_t
;
using
mluCnnlHandle
=
cnnlHandle_t
;
using
mluCnnlHandle
=
cnnlHandle_t
;
using
mluOpHandle
=
mluOpHandle_t
;
using
mluEventHandle
=
cnrtNotifier_t
;
using
mluEventHandle
=
cnrtNotifier_t
;
using
mluDeviceHandle
=
CNdev
;
using
mluDeviceHandle
=
CNdev
;
...
@@ -49,6 +52,9 @@ int GetMLURuntimeVersion(int id);
...
@@ -49,6 +52,9 @@ int GetMLURuntimeVersion(int id);
//! Get the cnnl version of the ith MLU.
//! Get the cnnl version of the ith MLU.
int
GetMLUCnnlVersion
(
int
id
);
int
GetMLUCnnlVersion
(
int
id
);
//! Get the mluOp version of the ith MLU.
int
GetMLUOpVersion
(
int
id
);
//! Get the total number of MLU devices in system.
//! Get the total number of MLU devices in system.
int
GetMLUDeviceCount
();
int
GetMLUDeviceCount
();
...
...
python/paddle/fluid/tests/unittests/mlu/test_grid_sampler_op_mlu.py
浏览文件 @
3e1e482b
...
@@ -186,16 +186,15 @@ class TestGridSamplerOp(OpTest):
...
@@ -186,16 +186,15 @@ class TestGridSamplerOp(OpTest):
self
.
mode
=
"bilinear"
self
.
mode
=
"bilinear"
# TODO(fwg): Test this case when cnnl support align_corners = True.
class
Case1
(
TestGridSamplerOp
):
# class Case1(TestGridSamplerOp):
#
def
initTestCase
(
self
):
# def initTestCase(self):
self
.
x_shape
=
(
2
,
3
,
5
,
6
)
# self.x_shape = (2, 3, 5, 6)
self
.
grid_shape
=
(
2
,
8
,
9
,
2
)
# self.grid_shape = (2, 8, 9, 2)
self
.
theta_shape
=
(
2
,
2
,
3
)
# self.theta_shape = (2, 2, 3)
self
.
align_corners
=
True
# self.align_corners = True
self
.
padding_mode
=
"zeros"
# self.padding_mode = "zeros"
self
.
mode
=
"bilinear"
# self.mode = "bilinear"
class
LargeInputCase
(
TestGridSamplerOp
):
class
LargeInputCase
(
TestGridSamplerOp
):
...
@@ -209,16 +208,16 @@ class LargeInputCase(TestGridSamplerOp):
...
@@ -209,16 +208,16 @@ class LargeInputCase(TestGridSamplerOp):
self
.
mode
=
"bilinear"
self
.
mode
=
"bilinear"
# TODO(fwg): Test this case when cnnl support align_corners = True.
class
Case2
(
LargeInputCase
):
# class Case2(LargeInputCase):
#
def
initTestCase
(
self
):
# def initTestCase(self):
self
.
x_shape
=
(
2
,
3
,
128
,
128
)
# self.x_shape = (2, 3, 128, 128
)
self
.
grid_shape
=
(
2
,
130
,
130
,
2
)
# self.grid_shape = (2, 130, 130, 2
)
self
.
theta_shape
=
(
2
,
2
,
3
)
# self.theta_shape = (2, 2, 3)
self
.
align_corners
=
True
# self.align_corners = True
self
.
padding_mode
=
"zeros"
# self.padding_mode = "zeros
"
self
.
mode
=
"bilinear
"
# self.mode = "bilinear"
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/mlu/test_reduce_sum_op_mlu.py
浏览文件 @
3e1e482b
...
@@ -152,6 +152,11 @@ class TestKeepDim8DReduce(TestMLUReduceSumOp):
...
@@ -152,6 +152,11 @@ class TestKeepDim8DReduce(TestMLUReduceSumOp):
self
.
axis
=
(
3
,
4
,
5
)
self
.
axis
=
(
3
,
4
,
5
)
self
.
keep_dim
=
True
self
.
keep_dim
=
True
def
test_check_grad
(
self
):
self
.
check_grad_with_place
(
self
.
place
,
[
'X'
],
'Out'
,
max_relative_error
=
0.03
)
class
TestReduceAll
(
TestMLUReduceSumOp
):
class
TestReduceAll
(
TestMLUReduceSumOp
):
...
...
tools/dockerfile/Dockerfile.mlu
浏览文件 @
3e1e482b
# A image for building paddle binaries
# A image for building paddle binaries
# Update CNTOOLKIT_VERSION, CNNL_VERSION
and CNCL
_VERSION if using other versions
# Update CNTOOLKIT_VERSION, CNNL_VERSION
, CNCL_VERSION and MLUOPS
_VERSION if using other versions
#
#
# Build:
# Build:
# - CNTOOLKIT_VERSION 2.8.5
# - CNTOOLKIT_VERSION 3.0.2-1
# - CNNL_VERSION 1.10.5
# - CNNL_VERSION 1.13.0-1
# - CNCL_VERSION 1.1.2
# - CNCL_VERSION 1.2.1-1
# - MLUOPS_VERSION 0.2.0-1
#
#
# Download three packages from FTP (need to connect cambricon AE to get FTP url)
# Download three packages from FTP (need to connect cambricon AE to get FTP url)
# - cntoolkit_2.8.5.ubuntu18.04_amd64.deb
# - cntoolkit_3.0.2-1.ubuntu18.04_amd64.deb
# - cnnl_1.10.5.ubuntu18.04_amd64.deb
# - cnnl_1.13.0-1.ubuntu18.04_amd64.deb
# - cncl_1.1.2.ubuntu18.04_amd64.deb
# - cncl_1.2.1-1.ubuntu18.04_amd64.deb
# - mluops_0.2.0-1.ubuntu18.04_amd64.deb
# copy them to current directory first, then run build commands
# copy them to current directory first, then run build commands
#
#
# For example:
# For example:
...
@@ -19,11 +21,13 @@
...
@@ -19,11 +21,13 @@
# (get cntoolkit pkg)
# (get cntoolkit pkg)
# (get cnnl pkg)
# (get cnnl pkg)
# (get cncl pkg)
# (get cncl pkg)
# (get mluops pkg)
#
#
# docker build -f Dockerfile.mlu \
# docker build -f Dockerfile.mlu \
# --build-arg CNTOOLKIT_VERSION=2.8.5 \
# --build-arg CNTOOLKIT_VERSION=3.0.2-1 \
# --build-arg CNNL_VERSION=1.10.5 \
# --build-arg CNNL_VERSION=1.13.0-1 \
# --build-arg CNCL_VERSION=1.1.2 \
# --build-arg CNCL_VERSION=1.2.1-1 \
# --build-arg MLUOPS_VERSION=0.2.0-1 \
# -t paddlepaddle/paddle:latest-dev-mlu .
# -t paddlepaddle/paddle:latest-dev-mlu .
#
#
# without mlu device:
# without mlu device:
...
@@ -40,12 +44,14 @@ MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
...
@@ -40,12 +44,14 @@ MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
ENV WITH_GPU=OFF
ENV WITH_GPU=OFF
ARG CNTOOLKIT_VERSION=2.8.5
ARG CNTOOLKIT_VERSION=3.0.2-1
ARG CNNL_VERSION=1.10.5
ARG CNNL_VERSION=1.13.0-1
ARG CNCL_VERSION=1.1.2
ARG CNCL_VERSION=1.2.1-1
ARG MLUOPS_VERSION=0.2.0-1
ARG CNTOOLKIT_PKG=cntoolkit_$CNTOOLKIT_VERSION.ubuntu18.04_amd64.deb
ARG CNTOOLKIT_PKG=cntoolkit_$CNTOOLKIT_VERSION.ubuntu18.04_amd64.deb
ARG CNNL_PKG=cnnl_$CNNL_VERSION.ubuntu18.04_amd64.deb
ARG CNNL_PKG=cnnl_$CNNL_VERSION.ubuntu18.04_amd64.deb
ARG CNCL_PKG=cncl_$CNCL_VERSION.ubuntu18.04_amd64.deb
ARG CNCL_PKG=cncl_$CNCL_VERSION.ubuntu18.04_amd64.deb
ARG MLUOPS_PKG=mluops_$MLUOPS_VERSION.ubuntu18.04_amd64.deb
# install cntoolkit
# install cntoolkit
COPY $CNTOOLKIT_PKG ./
COPY $CNTOOLKIT_PKG ./
...
@@ -67,6 +73,11 @@ COPY $CNCL_PKG ./
...
@@ -67,6 +73,11 @@ COPY $CNCL_PKG ./
RUN dpkg -i $CNCL_PKG && \
RUN dpkg -i $CNCL_PKG && \
rm -f $CNCL_PKG
rm -f $CNCL_PKG
# install mluops
COPY $MLUOPS_PKG ./
RUN dpkg -i $MLUOPS_PKG && \
rm -f $MLUOPS_PKG
# Clean
# Clean
RUN apt-get clean -y
RUN apt-get clean -y
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录