Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
2616d51a
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2616d51a
编写于
6月 30, 2022
作者:
C
Chenxiao Niu
提交者:
GitHub
6月 30, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[MLU] add rnn forward kernel. (#43894)
上级
1efc80c6
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
865 addition
and
0 deletion
+865
-0
paddle/fluid/operators/mlu/mlu_baseop.cc
paddle/fluid/operators/mlu/mlu_baseop.cc
+173
-0
paddle/fluid/operators/mlu/mlu_baseop.h
paddle/fluid/operators/mlu/mlu_baseop.h
+113
-0
paddle/fluid/operators/rnn_op_mlu.cc
paddle/fluid/operators/rnn_op_mlu.cc
+371
-0
python/paddle/fluid/tests/unittests/mlu/test_rnn_op_mlu.py
python/paddle/fluid/tests/unittests/mlu/test_rnn_op_mlu.py
+208
-0
未找到文件。
paddle/fluid/operators/mlu/mlu_baseop.cc
浏览文件 @
2616d51a
...
...
@@ -620,6 +620,80 @@ MLUCnnlDCNDesc::~MLUCnnlDCNDesc() {
}
}
MLUSeqDataDesc
::
MLUSeqDataDesc
(
cnnlSeqDataLayout_t
layout
,
cnnlDataType_t
dtype
,
int
dimNb
,
const
int
dimSize
[],
int
seqLengthArraySize
,
const
int
seqLengthArray
[],
void
*
paddingFill
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlCreateSeqDataDescriptor
(
&
seq_data_desc_
));
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlSetSeqDataDescriptor
(
seq_data_desc_
,
layout
,
dtype
,
dimNb
,
dimSize
,
seqLengthArraySize
,
seqLengthArray
,
paddingFill
));
}
const
cnnlSeqDataDescriptor_t
MLUSeqDataDesc
::
get
()
const
{
return
seq_data_desc_
;
}
MLUSeqDataDesc
::~
MLUSeqDataDesc
()
{
if
(
seq_data_desc_
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlDestroySeqDataDescriptor
(
seq_data_desc_
));
}
}
MLURNNDesc
::
MLURNNDesc
(
const
int
hidden_size
,
const
int
num_layers
,
const
cnnlRNNInputMode_t
input_mode
,
const
cnnlDirectionMode_t
direction
,
const
cnnlRNNMode_t
rnn_mode
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlCreateRNNDescriptor
(
&
rnn_desc_
));
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlSetRNNDescriptor
(
rnn_desc_
,
hidden_size
,
num_layers
,
input_mode
,
direction
,
rnn_mode
));
}
MLURNNDesc
::
MLURNNDesc
(
cnnlRNNMode_t
cell_mode
,
cnnlRNNBiasMode_t
bias_mode
,
cnnlDirectionMode_t
direction
,
cnnlRNNInputMode_t
input_mode
,
cnnlDataType_t
data_type
,
cnnlDataType_t
math_prec
,
int
input_size
,
int
hidden_size
,
int
proj_size
,
int
layer_num
,
void
*
dropout_desc
,
cnnlRNNPaddingMode_t
padding_mode
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlCreateRNNDescriptor
(
&
rnn_desc_
));
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlSetRNNDescriptor_v2
(
rnn_desc_
,
cell_mode
,
bias_mode
,
direction
,
input_mode
,
data_type
,
math_prec
,
input_size
,
hidden_size
,
proj_size
,
layer_num
,
dropout_desc
,
padding_mode
));
}
const
cnnlRNNDescriptor_t
MLURNNDesc
::
get
()
const
{
return
rnn_desc_
;
}
MLURNNDesc
::~
MLURNNDesc
()
{
if
(
rnn_desc_
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlDestroyRNNDescriptor
(
rnn_desc_
));
}
}
/* static */
void
MLUCnnl
::
Active
(
const
ExecutionContext
&
ctx
,
cnnlActivationDescriptor_t
active_desc
,
const
cnnlTensorDescriptor_t
input_desc
,
...
...
@@ -4471,6 +4545,105 @@ MLUCnnlDCNDesc::~MLUCnnlDCNDesc() {
output
));
}
/* static */
void
MLUCnnl
::
RNNForward
(
const
ExecutionContext
&
ctx
,
const
cnnlRNNDescriptor_t
rnn_desc
,
const
int
dev_seq_lengths
[],
const
void
*
weight_param_ptr
,
size_t
weightspace_size
,
const
cnnlSeqDataDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlSeqDataDescriptor_t
y_desc
,
void
*
y
,
const
cnnlTensorDescriptor_t
h_desc
,
const
void
*
hx
,
void
*
hy
,
const
cnnlTensorDescriptor_t
c_desc
,
const
void
*
cx
,
void
*
cy
,
void
*
reservespace_ptr
)
{
cnnlHandle_t
handle
=
GetHandleFromCTX
(
ctx
);
// make sure 1. cnnlSetRNNDescriptor_v2 is invoked
// 2. x_desc is not NULL
PADDLE_ENFORCE_NOT_NULL
(
rnn_desc
,
paddle
::
platform
::
errors
::
Fatal
(
"MLU RNNForward failed. rnn_desc initializing failed."
));
PADDLE_ENFORCE_NOT_NULL
(
x_desc
,
paddle
::
platform
::
errors
::
Fatal
(
"MLU RNNForward failed. x_desc initializing failed."
));
auto
&
dev_ctx
=
GetDevCtxFromCTX
(
ctx
);
size_t
workspace_size
,
reservespace_size
;
Tensor
workspace
;
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlGetRNNTempSizes
(
handle
,
rnn_desc
,
x_desc
,
&
workspace_size
,
&
reservespace_size
));
workspace
=
ctx
.
AllocateTmpTensor
<
int8_t
,
MLUDeviceContext
>
(
{
static_cast
<
int64_t
>
(
workspace_size
)},
dev_ctx
);
void
*
workspace_ptr
=
workspace
.
mutable_data
(
ctx
.
GetPlace
());
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlRNNForwardTraining
(
handle
,
rnn_desc
,
dev_seq_lengths
,
x_desc
,
x
,
y_desc
,
y
,
h_desc
,
hx
,
hy
,
c_desc
,
cx
,
cy
,
weight_param_ptr
,
weightspace_size
,
workspace_ptr
,
workspace_size
,
reservespace_ptr
,
reservespace_size
));
}
/* static */
void
MLUCnnl
::
Mask
(
const
ExecutionContext
&
ctx
,
cnnlMaskedOp_t
masked_mode
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
masked_desc
,
const
void
*
masked
,
const
cnnlTensorDescriptor_t
value_desc
,
const
void
*
value
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
,
uint32_t
*
number
)
{
cnnlHandle_t
handle
=
GetHandleFromCTX
(
ctx
);
auto
&
dev_ctx
=
GetDevCtxFromCTX
(
ctx
);
size_t
workspace_size
;
Tensor
workspace
;
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlGetMaskedWorkspaceSize
(
handle
,
masked_mode
,
input_desc
,
masked_desc
,
value_desc
,
output_desc
,
&
workspace_size
));
workspace
=
ctx
.
AllocateTmpTensor
<
int8_t
,
MLUDeviceContext
>
(
{
static_cast
<
int64_t
>
(
workspace_size
)},
dev_ctx
);
void
*
workspace_ptr
=
workspace
.
mutable_data
(
ctx
.
GetPlace
());
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlMasked_v3
(
handle
,
masked_mode
,
input_desc
,
input
,
masked_desc
,
masked
,
value_desc
,
value
,
workspace_ptr
,
workspace_size
,
output_desc
,
output
,
number
));
}
/* static */
void
MLUCnnl
::
BceWithLogits
(
const
ExecutionContext
&
ctx
,
cnnlBceWithLogitsReduction_t
reduction
,
...
...
paddle/fluid/operators/mlu/mlu_baseop.h
浏览文件 @
2616d51a
...
...
@@ -495,6 +495,90 @@ class MLUCnnlDCNDesc {
cnnlDCNDescriptor_t
dcn_desc_
=
nullptr
;
};
class
MLUSeqDataDesc
{
public:
MLUSeqDataDesc
(
const
MLUSeqDataDesc
&
desc
)
=
delete
;
MLUSeqDataDesc
&
operator
=
(
const
MLUSeqDataDesc
&
desc
)
=
delete
;
MLUSeqDataDesc
(
cnnlSeqDataLayout_t
layout
,
cnnlDataType_t
dtype
,
int
dimNb
,
const
int
dimSize
[],
int
seqLengthArraySize
,
const
int
seqLengthArray
[],
void
*
paddingFill
);
const
cnnlSeqDataDescriptor_t
get
()
const
;
~
MLUSeqDataDesc
();
private:
cnnlSeqDataDescriptor_t
seq_data_desc_
=
nullptr
;
};
class
MLURNNDesc
{
public:
MLURNNDesc
(
const
MLURNNDesc
&
desc
)
=
delete
;
MLURNNDesc
&
operator
=
(
const
MLURNNDesc
&
desc
)
=
delete
;
MLURNNDesc
(
const
int
hidden_size
,
const
int
num_layers
,
const
cnnlRNNInputMode_t
input_mode
,
const
cnnlDirectionMode_t
direction
,
const
cnnlRNNMode_t
rnn_mode
);
MLURNNDesc
(
cnnlRNNMode_t
cell_mode
,
cnnlRNNBiasMode_t
bias_mode
,
cnnlDirectionMode_t
direction
,
cnnlRNNInputMode_t
input_mode
,
cnnlDataType_t
data_type
,
cnnlDataType_t
math_prec
,
int
input_size
,
int
hidden_size
,
int
proj_size
,
int
layer_num
,
void
*
dropout_desc
,
cnnlRNNPaddingMode_t
padding_mode
);
void
SetRNNProjectionLayers
(
const
int
rec_proj_size
,
const
int
out_proj_size
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlSetRNNProjectionLayers
(
rnn_desc_
,
rec_proj_size
,
out_proj_size
));
}
void
SetPeepholeMode
(
const
cnnlRNNPeepholeMode_t
peephole_mode
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlSetRNNPeepholeMode
(
rnn_desc_
,
peephole_mode
));
}
void
SetRNNBiasMode
(
const
cnnlRNNBiasMode_t
bias_mode
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlSetRNNBiasMode
(
rnn_desc_
,
bias_mode
));
}
void
SetRNNMaskMode
(
const
cnnlRNNMaskMode_t
mask_mode
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlSetRNNMaskMode
(
rnn_desc_
,
mask_mode
));
}
void
SetRNNClip
(
const
cnnlRNNClipMode_t
clip_mode
,
const
cnnlNanPropagation_t
clip_nan_opt
,
const
double
left_clip
,
const
double
right_clip
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlSetRNNClip
(
rnn_desc_
,
clip_mode
,
clip_nan_opt
,
left_clip
,
right_clip
));
}
void
SetRNNPaddingMode
(
const
cnnlRNNPaddingMode_t
padding_mode
)
{
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlSetRNNPaddingMode
(
rnn_desc_
,
padding_mode
));
}
const
cnnlRNNDescriptor_t
get
()
const
;
~
MLURNNDesc
();
private:
cnnlRNNDescriptor_t
rnn_desc_
=
nullptr
;
};
class
MLUCnnl
{
public:
static
void
Active
(
const
ExecutionContext
&
ctx
,
...
...
@@ -1814,6 +1898,35 @@ class MLUCnnl {
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
);
static
void
RNNForward
(
const
ExecutionContext
&
ctx
,
const
cnnlRNNDescriptor_t
rnn_desc
,
const
int
dev_seq_lengths
[],
const
void
*
weight_param_ptr
,
size_t
weightspace_size
,
const
cnnlSeqDataDescriptor_t
x_desc
,
const
void
*
x
,
const
cnnlSeqDataDescriptor_t
y_desc
,
void
*
y
,
const
cnnlTensorDescriptor_t
h_desc
,
const
void
*
hx
,
void
*
hy
,
const
cnnlTensorDescriptor_t
c_desc
,
const
void
*
cx
,
void
*
cy
,
void
*
reservespace_ptr
);
static
void
Mask
(
const
ExecutionContext
&
ctx
,
cnnlMaskedOp_t
masked_mode
,
const
cnnlTensorDescriptor_t
input_desc
,
const
void
*
input
,
const
cnnlTensorDescriptor_t
masked_desc
,
const
void
*
masked
,
const
cnnlTensorDescriptor_t
value_desc
,
const
void
*
value
,
const
cnnlTensorDescriptor_t
output_desc
,
void
*
output
,
uint32_t
*
number
);
static
void
Transform
(
const
ExecutionContext
&
ctx
,
const
void
*
alpha
,
const
void
*
beta
,
...
...
paddle/fluid/operators/rnn_op_mlu.cc
0 → 100644
浏览文件 @
2616d51a
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/fluid/platform/device/xpu/xpu_header.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/kernels/funcs/math_function.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
DDim
=
framework
::
DDim
;
using
TensorList
=
std
::
vector
<
framework
::
Tensor
>
;
template
<
typename
TensorType
,
typename
T
>
void
reset_parameter_vector
(
const
std
::
vector
<
TensorType
>&
raw_params_vec
,
const
int
&
num_layers
,
const
bool
&
is_bidirec
,
std
::
vector
<
std
::
vector
<
std
::
pair
<
const
T
*
,
size_t
>>>*
params_vec
)
{
// the parameter raw seuquence is [FWhi, FWhh, BWhi, BWhh] * num_layers
// + [FBhi, FBhh, BBhi, BBhh] * num_layers, we will reset the parameter to
// ([FWhi, FWhh, FBhi, FBhh] + [BWhi, BWhh, BBhi, BBhh]) * num_layers
const
int
&
direction_num
=
is_bidirec
?
2
:
1
;
const
int
&
layer_weight_size
=
4
*
direction_num
;
const
int
&
all_weight_size
=
num_layers
*
layer_weight_size
;
const
int
&
bias_start_idx
=
all_weight_size
/
2
;
for
(
int
i
=
0
;
i
<
num_layers
;
i
++
)
{
params_vec
->
at
(
i
).
resize
(
layer_weight_size
);
for
(
int
j
=
0
;
j
<
layer_weight_size
;
j
++
)
{
int
k
=
j
%
4
;
const
int
&
section
=
j
/
4
;
int
tensor_idx
=
i
*
2
*
direction_num
+
section
*
2
+
k
%
2
;
if
(
k
>=
2
)
{
tensor_idx
+=
bias_start_idx
;
}
using
remove_cv_t
=
typename
std
::
remove_cv
<
T
>::
type
;
params_vec
->
at
(
i
)[
j
]
=
std
::
make_pair
(
raw_params_vec
[
tensor_idx
]
->
template
data
<
remove_cv_t
>(),
raw_params_vec
[
tensor_idx
]
->
numel
()
*
sizeof
(
T
));
}
}
}
template
<
typename
DeviceContext
,
typename
T
>
class
RNNMLUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
// Input
auto
&
dev_ctx
=
GetDevCtxFromCTX
(
ctx
);
auto
*
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
pre_state
=
ctx
.
MultiInput
<
Tensor
>
(
"PreState"
);
auto
weight_list
=
ctx
.
MultiInput
<
framework
::
Tensor
>
(
"WeightList"
);
bool
has_seq_length
=
ctx
.
HasInput
(
"SequenceLength"
);
// Output
auto
state
=
ctx
.
MultiOutput
<
Tensor
>
(
"State"
);
auto
*
output
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
// auto* dropout_mask = ctx.Output<Tensor>("DropoutState");
auto
*
reserve_data
=
ctx
.
Output
<
Tensor
>
(
"Reserve"
);
// Attributes
const
int
&
num_layers
=
ctx
.
Attr
<
int
>
(
"num_layers"
);
const
bool
&
is_bidirec
=
ctx
.
Attr
<
bool
>
(
"is_bidirec"
);
const
int
&
hidden_size
=
ctx
.
Attr
<
int
>
(
"hidden_size"
);
const
std
::
string
&
mode
=
ctx
.
Attr
<
std
::
string
>
(
"mode"
);
const
Tensor
*
sequence_length
=
nullptr
;
if
(
has_seq_length
)
{
sequence_length
=
ctx
.
Input
<
Tensor
>
(
"SequenceLength"
);
}
// if (dropout_mask->IsInitialized()) {
// if (dropout_mask->numel() != output->numel()) dropout_mask->clear();
// }
// dropout_mask->mutable_data<uint8_t>(output->dims(), ctx.GetPlace());
// auto& dev_ctx = ctx.template device_context<DeviceContext>();
// phi::funcs::SetConstant<platform::XPUDeviceContext, uint8_t> ones;
// ones(dev_ctx, dropout_mask, static_cast<uint8_t>(1));
auto
init_h
=
pre_state
[
0
];
// -> hx
auto
init_c
=
pre_state
[
1
];
// -> cx
auto
last_h
=
state
[
0
];
auto
last_c
=
state
[
1
];
// check shape
const
int
in_out_dim_num
=
input
->
dims
().
size
();
const
int
&
seq_len
=
input
->
dims
()[
0
];
// time_step
const
int
&
batch_size
=
input
->
dims
()[
1
];
const
int
&
input_dim
=
input
->
dims
()[
2
];
const
int
&
direction_num
=
is_bidirec
?
2
:
1
;
int
in_dim_arr
[
in_out_dim_num
]
=
{
seq_len
,
batch_size
,
input_dim
};
int
out_dim_arr
[
in_out_dim_num
]
=
{
seq_len
,
batch_size
,
direction_num
*
hidden_size
};
int
proj_size
=
hidden_size
;
std
::
vector
<
int
>
seq_len_vec
(
batch_size
,
seq_len
);
if
(
has_seq_length
)
{
// set seq_len if no padding, otherwise seq_len for
// each element.
seq_len_vec
=
operators
::
GetDataFromTensor
(
sequence_length
);
}
cnnlDirectionMode_t
direction
=
is_bidirec
?
CNNL_RNN_BIDIRECTIONAL
:
CNNL_RNN_UNIDIRECTIONAL
;
PADDLE_ENFORCE_EQ
(
mode
,
"LSTM"
,
platform
::
errors
::
InvalidArgument
(
"MLU only support LSTM mode now, current mode is %s"
,
mode
));
PADDLE_ENFORCE_EQ
(
num_layers
,
1
,
platform
::
errors
::
InvalidArgument
(
"MLU only support 1 num_layers, current num_layers is %s"
,
num_layers
));
PADDLE_ENFORCE_EQ
(
init_h
->
dims
()[
0
],
num_layers
*
direction_num
,
platform
::
errors
::
InvalidArgument
(
"The num_layers of in RNN layer must"
" be the same as first dim of init "
"hidden, but received num_layers:%d,"
" dim:%d"
,
num_layers
,
init_h
->
dims
()[
0
]));
PADDLE_ENFORCE_EQ
(
init_c
->
dims
()[
0
],
num_layers
*
direction_num
,
platform
::
errors
::
InvalidArgument
(
"The num_layers of in RNN layer must"
" be the same as first dim of cell state hidden, but received"
" num_layers:%d, dim:%d"
,
num_layers
,
init_c
->
dims
()[
0
]));
// weightlist
std
::
vector
<
std
::
vector
<
std
::
pair
<
const
T
*
,
size_t
>>>
parameter_lists
;
parameter_lists
.
resize
(
num_layers
);
reset_parameter_vector
(
weight_list
,
num_layers
,
is_bidirec
,
&
parameter_lists
);
// init the output and allocate the memory
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
// -> y in cnnl
last_h
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
// -> hy in cnnl
last_c
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
// -> cy in cnnl
MLUSeqDataDesc
input_seq_data_desc
(
CNNL_SEQDATA_TNC
,
ToCnnlDataType
(
input
->
dtype
()),
in_out_dim_num
,
in_dim_arr
,
static_cast
<
int
>
(
seq_len_vec
.
size
()),
seq_len_vec
.
data
(),
nullptr
);
MLUSeqDataDesc
out_seq_data_desc
(
CNNL_SEQDATA_TNC
,
ToCnnlDataType
(
input
->
dtype
()),
in_out_dim_num
,
out_dim_arr
,
static_cast
<
int
>
(
seq_len_vec
.
size
()),
seq_len_vec
.
data
(),
nullptr
);
MLUCnnlTensorDesc
hx_desc
(
*
init_h
);
MLUCnnlTensorDesc
cx_desc
(
*
init_c
);
MLURNNDesc
rnn_desc
(
CNNL_LSTM
,
CNNL_RNN_DOUBLE_BIAS
,
direction
,
CNNL_RNN_LINEAR_INPUT
,
ToCnnlDataType
(
input
->
dtype
()),
ToCnnlDataType
(
input
->
dtype
()),
input_dim
,
hidden_size
,
/*projection*/
proj_size
,
num_layers
,
nullptr
,
CNNL_RNN_PADDED_IO_DISABLED
);
rnn_desc
.
SetRNNMaskMode
(
CNNL_LSTM_MASK_ENABLED
);
// copy weight params
size_t
weightspace_size
;
framework
::
Tensor
weightspace
;
PADDLE_ENFORCE_MLU_SUCCESS
(
cnnlGetRNNWeightSpaceSize
(
GetHandleFromCTX
(
ctx
),
rnn_desc
.
get
(),
&
weightspace_size
));
weightspace
=
ctx
.
AllocateTmpTensor
<
T
,
DeviceContext
>
(
{
static_cast
<
int64_t
>
(
weightspace_size
)},
dev_ctx
);
void
*
weightspace_ptr
=
weightspace
.
mutable_data
(
ctx
.
GetPlace
());
auto
w_x
=
parameter_lists
[
0
][
0
];
auto
w_h
=
parameter_lists
[
0
][
1
];
auto
b_x
=
parameter_lists
[
0
][
2
];
auto
b_h
=
parameter_lists
[
0
][
3
];
auto
actual_total_w_size
=
w_x
.
second
+
w_h
.
second
+
b_x
.
second
+
b_h
.
second
;
void
*
w_x_ptr
=
weightspace_ptr
;
void
*
w_h_ptr
=
static_cast
<
char
*>
(
weightspace_ptr
)
+
w_x
.
second
;
void
*
b_x_ptr
=
static_cast
<
char
*>
(
weightspace_ptr
)
+
w_x
.
second
+
w_h
.
second
;
void
*
b_h_ptr
=
static_cast
<
char
*>
(
weightspace_ptr
)
+
w_x
.
second
+
w_h
.
second
+
b_x
.
second
;
memory
::
Copy
(
weightspace
.
place
(),
w_x_ptr
,
weightspace
.
place
(),
w_x
.
first
,
w_x
.
second
,
nullptr
);
memory
::
Copy
(
weightspace
.
place
(),
w_h_ptr
,
weightspace
.
place
(),
w_h
.
first
,
w_h
.
second
,
nullptr
);
memory
::
Copy
(
weightspace
.
place
(),
b_x_ptr
,
weightspace
.
place
(),
b_x
.
first
,
b_x
.
second
,
nullptr
);
memory
::
Copy
(
weightspace
.
place
(),
b_h_ptr
,
weightspace
.
place
(),
b_h
.
first
,
b_h
.
second
,
nullptr
);
if
(
is_bidirec
)
{
auto
bw_x
=
parameter_lists
[
0
][
4
];
auto
bw_h
=
parameter_lists
[
0
][
5
];
auto
bb_x
=
parameter_lists
[
0
][
6
];
auto
bb_h
=
parameter_lists
[
0
][
7
];
void
*
bw_x_ptr
=
static_cast
<
char
*>
(
weightspace_ptr
)
+
actual_total_w_size
;
void
*
bw_h_ptr
=
static_cast
<
char
*>
(
weightspace_ptr
)
+
actual_total_w_size
+
bw_x
.
second
;
void
*
bb_x_ptr
=
static_cast
<
char
*>
(
weightspace_ptr
)
+
actual_total_w_size
+
bw_x
.
second
+
bw_h
.
second
;
void
*
bb_h_ptr
=
static_cast
<
char
*>
(
weightspace_ptr
)
+
actual_total_w_size
+
bw_x
.
second
+
bw_h
.
second
+
bb_x
.
second
;
actual_total_w_size
+=
bw_x
.
second
+
bw_h
.
second
+
bb_x
.
second
+
bb_h
.
second
;
memory
::
Copy
(
weightspace
.
place
(),
bw_x_ptr
,
weightspace
.
place
(),
bw_x
.
first
,
bw_x
.
second
,
nullptr
);
memory
::
Copy
(
weightspace
.
place
(),
bw_h_ptr
,
weightspace
.
place
(),
bw_h
.
first
,
bw_h
.
second
,
nullptr
);
memory
::
Copy
(
weightspace
.
place
(),
bb_x_ptr
,
weightspace
.
place
(),
bb_x
.
first
,
bb_x
.
second
,
nullptr
);
memory
::
Copy
(
weightspace
.
place
(),
bb_h_ptr
,
weightspace
.
place
(),
bb_h
.
first
,
bb_h
.
second
,
nullptr
);
}
PADDLE_ENFORCE_EQ
(
weightspace_size
,
actual_total_w_size
,
platform
::
errors
::
InvalidArgument
(
"The weightsize doesn't match"
" weightspace_size:%d, actual_total_w_size:%d"
,
weightspace_size
,
actual_total_w_size
));
// get reservespace_ptr
int
gate_num
=
4
;
int
hidden_data_idx
=
(
num_layers
-
1
);
hidden_data_idx
+=
(
gate_num
+
1
)
*
num_layers
;
const
int
&
block_size
=
direction_num
*
seq_len
*
batch_size
*
hidden_size
;
reserve_data
->
Resize
({
hidden_data_idx
,
block_size
});
reserve_data
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
MLUCnnl
::
RNNForward
(
ctx
,
rnn_desc
.
get
(),
seq_len_vec
.
data
(),
weightspace_ptr
,
weightspace_size
,
input_seq_data_desc
.
get
(),
GetBasePtr
(
input
),
out_seq_data_desc
.
get
(),
GetBasePtr
(
output
),
hx_desc
.
get
(),
GetBasePtr
(
init_h
),
GetBasePtr
(
last_h
),
cx_desc
.
get
(),
GetBasePtr
(
init_c
),
GetBasePtr
(
last_c
),
GetBasePtr
(
reserve_data
));
if
(
has_seq_length
)
{
// if has_seq_length, do mask out the output of cnnlRNNForwardTraining
auto
masked_mode
=
CNNL_MASKED_FILL
;
float
off_value
=
0.0
f
;
framework
::
Tensor
on_value_tensor
(
input
->
dtype
());
framework
::
Tensor
masked_tensor
(
framework
::
TransToPhiDataType
(
VT
::
INT8
));
framework
::
Tensor
h_masked_tensor
(
framework
::
TransToPhiDataType
(
VT
::
INT8
));
on_value_tensor
.
Resize
({
1
});
masked_tensor
.
Resize
({
seq_len
,
batch_size
,
direction_num
*
hidden_size
});
h_masked_tensor
.
Resize
(
{
seq_len
,
batch_size
,
direction_num
*
hidden_size
});
on_value_tensor
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
masked_tensor
.
mutable_data
<
int8_t
>
(
ctx
.
GetPlace
());
int8_t
*
h_masked_ptr
=
h_masked_tensor
.
mutable_data
<
int8_t
>
(
platform
::
CPUPlace
());
for
(
int
t
=
0
;
t
<
seq_len
;
++
t
)
{
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
for
(
int
c
=
0
;
c
<
direction_num
*
hidden_size
;
++
c
)
{
auto
tmp_seq_len
=
seq_len_vec
[
n
];
auto
offset
=
t
*
batch_size
*
direction_num
*
hidden_size
+
n
*
direction_num
*
hidden_size
+
c
;
*
(
h_masked_ptr
+
offset
)
=
t
>=
tmp_seq_len
?
1
:
0
;
}
}
}
framework
::
TensorCopy
(
h_masked_tensor
,
ctx
.
GetPlace
(),
dev_ctx
,
&
masked_tensor
);
dev_ctx
.
Wait
();
FillMLUTensorWithHostValue
(
ctx
,
off_value
,
&
on_value_tensor
);
MLUCnnlTensorDesc
on_value_desc
(
on_value_tensor
);
MLUCnnlTensorDesc
output_desc
(
*
output
);
MLUCnnlTensorDesc
masked_desc
(
masked_tensor
);
MLUCnnl
::
Mask
(
ctx
,
masked_mode
,
output_desc
.
get
(),
GetBasePtr
(
output
),
masked_desc
.
get
(),
GetBasePtr
(
&
masked_tensor
),
on_value_desc
.
get
(),
GetBasePtr
(
&
on_value_tensor
),
output_desc
.
get
(),
GetBasePtr
(
output
),
nullptr
);
}
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_MLU_KERNEL
(
rnn
,
ops
::
RNNMLUKernel
<
paddle
::
platform
::
MLUDeviceContext
,
float
>
);
python/paddle/fluid/tests/unittests/mlu/test_rnn_op_mlu.py
0 → 100644
浏览文件 @
2616d51a
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
math
import
paddle.fluid.core
as
core
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.layers
as
layers
import
random
import
sys
sys
.
path
.
append
(
'..'
)
from
op_test
import
OpTest
sys
.
path
.
append
(
"../rnn"
)
from
rnn_numpy
import
SimpleRNN
,
LSTM
,
GRU
from
convert
import
get_params_for_net
random
.
seed
(
2
)
np
.
set_printoptions
(
threshold
=
np
.
inf
)
paddle
.
enable_static
()
class
TestRNNOp
(
OpTest
):
def
get_weight_names
(
self
):
weight_names
=
[]
for
i
in
range
(
self
.
num_layers
):
for
j
in
range
(
0
,
2
*
self
.
direction_num
):
weight_names
.
append
(
"{}.weight_{}"
.
format
(
i
,
j
))
for
i
in
range
(
self
.
num_layers
):
for
j
in
range
(
0
,
2
*
self
.
direction_num
):
weight_names
.
append
(
"{}.bias_{}"
.
format
(
i
,
j
))
return
weight_names
def
setUp
(
self
):
self
.
__class__
.
use_mlu
=
True
self
.
place
=
paddle
.
device
.
MLUPlace
(
0
)
self
.
in_type
=
np
.
float32
self
.
init_dtype
()
self
.
init_size
()
self
.
op_type
=
"rnn"
self
.
sequence_length
=
np
.
array
([
12
,
11
,
10
,
9
,
8
],
dtype
=
np
.
int32
)
self
.
num_layers
=
1
self
.
is_bidirec
=
False
self
.
mode
=
"LSTM"
self
.
is_test
=
False
self
.
dropout
=
0.0
self
.
set_attrs
()
self
.
direction_num
=
2
if
self
.
is_bidirec
else
1
direction
=
"bidirectional"
if
self
.
is_bidirec
else
"forward"
input
=
np
.
random
.
uniform
(
low
=-
0.1
,
high
=
0.1
,
size
=
(
self
.
seq_length
,
self
.
batch_size
,
self
.
input_size
)).
astype
(
self
.
dtype
)
input
[
11
][
1
:][:]
=
0
input
[
10
][
2
:][:]
=
0
input
[
9
][
3
:][:]
=
0
input
[
8
][
4
:][:]
=
0
rnn1
=
LSTM
(
self
.
input_size
,
self
.
hidden_size
,
num_layers
=
self
.
num_layers
,
time_major
=
True
,
direction
=
direction
,
dropout
=
self
.
dropout
,
dtype
=
self
.
dtype
)
flat_w
=
get_params_for_net
(
rnn1
)
output
,
(
last_hidden
,
last_cell
)
=
rnn1
(
input
,
sequence_length
=
self
.
sequence_length
)
init_h
=
np
.
zeros
(
(
self
.
num_layers
*
self
.
direction_num
,
self
.
batch_size
,
self
.
hidden_size
)).
astype
(
self
.
dtype
)
init_c
=
np
.
zeros
(
(
self
.
num_layers
*
self
.
direction_num
,
self
.
batch_size
,
self
.
hidden_size
)).
astype
(
self
.
dtype
)
state_out
=
np
.
ndarray
((
300
)).
astype
(
"uint8"
)
self
.
inputs
=
{
'Input'
:
input
,
'WeightList'
:
flat_w
,
'PreState'
:
[(
'init_h'
,
init_h
),
(
'init_c'
,
init_c
)],
'SequenceLength'
:
self
.
sequence_length
}
if
self
.
sequence_length
is
None
:
self
.
inputs
=
{
'Input'
:
input
,
'WeightList'
:
flat_w
,
'PreState'
:
[(
'init_h'
,
init_h
),
(
'init_c'
,
init_c
)],
}
self
.
attrs
=
{
'dropout_prob'
:
self
.
dropout
,
'is_bidirec'
:
self
.
is_bidirec
,
'input_size'
:
self
.
input_size
,
'hidden_size'
:
self
.
hidden_size
,
'num_layers'
:
self
.
num_layers
,
'mode'
:
self
.
mode
,
'is_test'
:
self
.
is_test
}
self
.
outputs
=
{
'Out'
:
output
,
"State"
:
[(
'last_hidden'
,
last_hidden
),
(
'last_cell'
,
last_cell
)],
'Reserve'
:
np
.
ndarray
((
400
)).
astype
(
"uint8"
),
'DropoutState'
:
state_out
}
def
init_dtype
(
self
):
self
.
dtype
=
self
.
in_type
def
init_size
(
self
):
self
.
seq_length
=
12
self
.
batch_size
=
5
self
.
input_size
=
3
self
.
hidden_size
=
2
def
test_output
(
self
):
self
.
check_output_with_place
(
self
.
place
,
no_check_set
=
[
'Reserve'
,
'DropoutState'
,
'State'
])
def
set_attrs
(
self
):
pass
# def test_grad(self):
# if not self.is_test:
# var_name_list = self.get_weight_names()
# grad_check_list = ['Input', 'init_h', 'init_c']
# grad_check_list.extend(var_name_list)
# self.check_grad_with_place(self.place, set(grad_check_list),
# ['Out', 'last_hidden', 'last_cell'])
# class TestRNNOp1(TestRNNOp):
# def set_attrs(self):
# self.sequence_length = None
# class TestRNNOp2(TestRNNOp):
# def set_attrs(self):
# self.sequence_length = None
# self.is_bidirec = True
# class TestRNNOp3(TestRNNOp):
# def set_attrs(self):
# self.is_test = True
# self.sequence_length = None
# class TestRNNOp4(TestRNNOp):
# def set_attrs(self):
# self.is_test = True
# self.sequence_length = None
# self.is_bidirec = True
#TODO(chenxiao): cnnl doesn't support num_layers > 1 case
# class TestRNNOp5(TestRNNOp):
# def set_attrs(self):
# self.num_layers = 2
# class TestRNNOp6(TestRNNOp):
# def set_attrs(self):
# self.num_layers = 2
# self.is_bidirec = True
# class TestRNNOp7(TestRNNOp):
# def set_attrs(self):
# self.num_layers = 2
# self.is_bidirec = True
# self.is_test = True
# class TestRNNOp8(TestRNNOp):
# def set_attrs(self):
# self.num_layers = 2
# self.is_bidirec = True
# self.sequence_length = None
# class TestRNNOp9(TestRNNOp):
# def set_attrs(self):
# self.num_layers = 3
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录