Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
91631492
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
91631492
编写于
9月 07, 2022
作者:
H
houj04
提交者:
GitHub
9月 07, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[XPU] move rnn op to phi. (#45822)
上级
a9cc0274
变更
6
展开全部
显示空白变更内容
内联
并排
Showing
6 changed file
with
623 addition
and
571 deletion
+623
-571
paddle/fluid/operators/rnn_op_xpu.cc
paddle/fluid/operators/rnn_op_xpu.cc
+0
-571
paddle/fluid/platform/device/npu/CMakeLists.txt
paddle/fluid/platform/device/npu/CMakeLists.txt
+6
-0
paddle/phi/kernels/funcs/math_function.cc
paddle/phi/kernels/funcs/math_function.cc
+13
-0
paddle/phi/kernels/xpu/rnn_grad_kernel.cc
paddle/phi/kernels/xpu/rnn_grad_kernel.cc
+326
-0
paddle/phi/kernels/xpu/rnn_kernel.cc
paddle/phi/kernels/xpu/rnn_kernel.cc
+229
-0
paddle/phi/kernels/xpu/rnn_util.h
paddle/phi/kernels/xpu/rnn_util.h
+49
-0
未找到文件。
paddle/fluid/operators/rnn_op_xpu.cc
已删除
100644 → 0
浏览文件 @
a9cc0274
此差异已折叠。
点击以展开。
paddle/fluid/platform/device/npu/CMakeLists.txt
浏览文件 @
91631492
...
@@ -31,3 +31,9 @@ if(WITH_ASCEND_CL)
...
@@ -31,3 +31,9 @@ if(WITH_ASCEND_CL)
SRCS npu_op_runner.cc
SRCS npu_op_runner.cc
DEPS operator npu_info
)
DEPS operator npu_info
)
endif
()
endif
()
# every source file that includes "dnnl.h" must depends on mkldnn
# or, the first one should depends on mkldnn
if
(
WITH_MKLDNN
)
add_dependencies
(
npu_collective_helper mkldnn
)
endif
()
paddle/phi/kernels/funcs/math_function.cc
浏览文件 @
91631492
...
@@ -67,6 +67,19 @@ template struct SetConstant<paddle::platform::XPUDeviceContext,
...
@@ -67,6 +67,19 @@ template struct SetConstant<paddle::platform::XPUDeviceContext,
phi
::
dtype
::
complex
<
float
>
>
;
phi
::
dtype
::
complex
<
float
>
>
;
template
struct
SetConstant
<
paddle
::
platform
::
XPUDeviceContext
,
template
struct
SetConstant
<
paddle
::
platform
::
XPUDeviceContext
,
phi
::
dtype
::
complex
<
double
>
>
;
phi
::
dtype
::
complex
<
double
>
>
;
template
struct
SetConstant
<
phi
::
XPUContext
,
phi
::
dtype
::
float16
>;
template
struct
SetConstant
<
phi
::
XPUContext
,
phi
::
dtype
::
bfloat16
>;
template
struct
SetConstant
<
phi
::
XPUContext
,
float
>;
template
struct
SetConstant
<
phi
::
XPUContext
,
double
>;
template
struct
SetConstant
<
phi
::
XPUContext
,
uint8_t
>;
template
struct
SetConstant
<
phi
::
XPUContext
,
int16_t
>;
template
struct
SetConstant
<
phi
::
XPUContext
,
int
>;
template
struct
SetConstant
<
phi
::
XPUContext
,
int64_t
>;
template
struct
SetConstant
<
phi
::
XPUContext
,
bool
>;
template
struct
SetConstant
<
phi
::
XPUContext
,
phi
::
dtype
::
complex
<
float
>
>
;
template
struct
SetConstant
<
phi
::
XPUContext
,
phi
::
dtype
::
complex
<
double
>
>
;
#endif
#endif
#define DEFINE_CPU_TRANS(RANK) \
#define DEFINE_CPU_TRANS(RANK) \
...
...
paddle/phi/kernels/xpu/rnn_grad_kernel.cc
0 → 100644
浏览文件 @
91631492
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/rnn_grad_kernel.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/xpu/rnn_util.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
RnnGradKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
std
::
vector
<
const
DenseTensor
*>&
pre_state
,
const
std
::
vector
<
const
DenseTensor
*>&
weight_list
,
const
paddle
::
optional
<
DenseTensor
>&
sequence_length
,
const
DenseTensor
&
out
,
const
DenseTensor
&
dropout_state
,
const
DenseTensor
&
reserve
,
const
DenseTensor
&
out_grad
,
const
std
::
vector
<
const
DenseTensor
*>&
state_grad
,
float
dropout_prob
,
bool
is_bidirec
,
int
input_size
,
int
hidden_size
,
int
num_layers
,
const
std
::
string
&
mode
,
int
seed
,
bool
is_test
,
DenseTensor
*
x_grad
,
std
::
vector
<
DenseTensor
*>
pre_state_grad
,
std
::
vector
<
DenseTensor
*>
weight_grad_list
)
{
using
XPUTyp
=
typename
XPUTypeTrait
<
T
>::
Type
;
PADDLE_ENFORCE_EQ
(
mode
,
"LSTM"
,
errors
::
InvalidArgument
(
"XPU only support LSTM mode now, current mode is %s"
,
mode
));
auto
init_h
=
pre_state
[
0
];
auto
init_c
=
pre_state
[
1
];
auto
last_h_grad
=
state_grad
[
0
];
auto
last_c_grad
=
state_grad
[
1
];
// get the tensor pointer for the output
DenseTensor
*
init_h_grad
=
nullptr
;
DenseTensor
*
init_c_grad
=
nullptr
;
if
(
pre_state_grad
.
size
()
>
0
)
{
// has gradient
init_h_grad
=
pre_state_grad
[
0
];
init_c_grad
=
pre_state_grad
[
1
];
}
// check shape
const
int
&
seq_len
=
x
.
dims
()[
0
];
const
int
&
batch_size
=
x
.
dims
()[
1
];
const
int
&
input_dim
=
x
.
dims
()[
2
];
const
int
&
direction_num
=
is_bidirec
?
2
:
1
;
PADDLE_ENFORCE_EQ
(
init_h
->
dims
()[
0
],
num_layers
*
direction_num
,
errors
::
InvalidArgument
(
"The num_layers of in RNN layer must"
" be the same as first dim of init "
"hidden, but received num_layers:%d,"
" dim:%d"
,
num_layers
,
init_h
->
dims
()[
0
]));
PADDLE_ENFORCE_EQ
(
init_c
->
dims
()[
0
],
num_layers
*
direction_num
,
errors
::
InvalidArgument
(
"The num_layers of in RNN layer must"
" be the same as first dim of cell state hidden, but received"
" num_layers:%d, dim:%d"
,
num_layers
,
init_c
->
dims
()[
0
]));
std
::
vector
<
std
::
vector
<
const
T
*>>
parameter_lists
;
parameter_lists
.
resize
(
num_layers
);
reset_parameter_vector
(
weight_list
,
num_layers
,
is_bidirec
,
&
parameter_lists
);
for
(
unsigned
int
i
=
0
;
i
<
weight_grad_list
.
size
();
++
i
)
{
dev_ctx
.
template
Alloc
<
T
>(
weight_grad_list
[
i
]);
}
std
::
vector
<
std
::
vector
<
T
*>>
parameter_lists_grad
;
parameter_lists_grad
.
resize
(
num_layers
);
reset_parameter_vector
(
weight_grad_list
,
num_layers
,
is_bidirec
,
&
parameter_lists_grad
);
// allocate the memory and initization the x_grad
x_grad
->
Resize
(
x
.
dims
());
dev_ctx
.
template
Alloc
<
T
>(
x_grad
);
phi
::
funcs
::
SetConstant
<
phi
::
XPUContext
,
T
>
zero
;
zero
(
dev_ctx
,
x_grad
,
static_cast
<
T
>
(
0.0
));
DenseTensor
a
,
b
;
DenseTensor
*
dynamic_grad_pre_h
=
&
a
;
DenseTensor
*
dynamic_grad_pre_c
=
&
b
;
if
(
init_h_grad
)
{
init_h_grad
->
Resize
(
last_h_grad
->
dims
());
dev_ctx
.
template
Alloc
<
T
>(
init_h_grad
);
zero
(
dev_ctx
,
init_h_grad
,
static_cast
<
T
>
(
0.0
));
}
else
{
dynamic_grad_pre_h
->
Resize
(
last_h_grad
->
dims
());
dev_ctx
.
template
Alloc
<
T
>(
dynamic_grad_pre_h
);
zero
(
dev_ctx
,
dynamic_grad_pre_h
,
static_cast
<
T
>
(
0.0
));
init_h_grad
=
dynamic_grad_pre_h
;
}
if
(
init_c_grad
)
{
init_c_grad
->
Resize
(
last_c_grad
->
dims
());
dev_ctx
.
template
Alloc
<
T
>(
init_c_grad
);
}
else
{
dynamic_grad_pre_c
->
Resize
(
last_h_grad
->
dims
());
dev_ctx
.
template
Alloc
<
T
>(
dynamic_grad_pre_c
);
init_c_grad
=
dynamic_grad_pre_c
;
}
DenseTensor
temp_input_grad_1
,
temp_input_grad_2
;
T
*
input_grad_1_ptr
=
nullptr
;
T
*
input_grad_2_ptr
=
nullptr
;
if
(
num_layers
>=
2
)
{
temp_input_grad_1
.
Resize
(
x_grad
->
dims
());
input_grad_1_ptr
=
dev_ctx
.
template
Alloc
<
T
>(
&
temp_input_grad_1
);
}
if
(
num_layers
>=
3
)
{
temp_input_grad_2
.
Resize
(
x_grad
->
dims
());
input_grad_2_ptr
=
dev_ctx
.
template
Alloc
<
T
>(
&
temp_input_grad_2
);
}
// get ptr from tensor
auto
x_data
=
x
.
data
<
T
>
();
auto
init_h_ptr
=
init_h
->
data
<
T
>
();
auto
init_c_ptr
=
init_c
->
data
<
T
>
();
auto
y
=
out
.
data
<
T
>
();
auto
y_grad
=
out_grad
.
data
<
T
>
();
auto
last_h_grad_ptr
=
last_h_grad
->
data
<
T
>
();
auto
last_c_grad_ptr
=
last_c_grad
->
data
<
T
>
();
auto
x_grad_data
=
x_grad
->
data
<
T
>
();
auto
init_h_grad_ptr
=
init_h_grad
->
data
<
T
>
();
auto
init_c_grad_ptr
=
init_c_grad
->
data
<
T
>
();
const
int
&
block_size
=
direction_num
*
seq_len
*
batch_size
*
hidden_size
;
auto
i_f_g_o_ptr
=
reserve
.
data
<
T
>
();
auto
c_ptr
=
i_f_g_o_ptr
+
num_layers
*
block_size
*
4
;
auto
hidden_data_ptr
=
c_ptr
+
num_layers
*
block_size
*
1
;
int
state_offset
=
pre_state
[
0
]
->
dims
()[
1
]
*
pre_state
[
0
]
->
dims
()[
2
];
bool
has_seq_length
=
sequence_length
.
is_initialized
();
std
::
vector
<
int
>
seq_len_tensor
(
batch_size
,
seq_len
);
if
(
has_seq_length
)
{
seq_len_tensor
=
paddle
::
operators
::
GetDataFromTensor
<
int
>
(
sequence_length
.
get_ptr
());
}
for
(
int
i
=
num_layers
-
1
;
i
>=
0
;
--
i
)
{
// the layer input output had saved, just use the data
auto
w_x
=
parameter_lists
[
i
][
0
];
auto
w_h
=
parameter_lists
[
i
][
1
];
auto
bw_x
=
parameter_lists
[
i
][
4
];
auto
bw_h
=
parameter_lists
[
i
][
5
];
auto
i_f_g_o
=
i_f_g_o_ptr
+
i
*
block_size
*
4
;
auto
c
=
c_ptr
+
i
*
block_size
;
DenseTensor
layer_input_t
;
auto
layer_input
=
x_data
;
if
(
i
>
0
)
{
layer_input_t
.
Resize
(
out
.
dims
());
layer_input
=
dev_ctx
.
template
Alloc
<
T
>(
&
layer_input_t
);
float
scale
=
static_cast
<
float
>
(
1.0
f
-
dropout_prob
);
auto
hidden_data
=
hidden_data_ptr
+
(
i
-
1
)
*
block_size
;
int
r
=
xpu
::
scale
(
dev_ctx
.
x_context
(),
reinterpret_cast
<
const
XPUTyp
*>
(
hidden_data
),
const_cast
<
XPUTyp
*>
(
layer_input
),
out
.
numel
(),
false
,
scale
,
0.0
f
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"scale"
);
}
else
{
layer_input
=
x_data
;
}
auto
layer_output
=
y
;
if
(
i
==
num_layers
-
1
)
{
layer_output
=
y
;
}
else
{
layer_output
=
hidden_data_ptr
+
i
*
block_size
;
}
const
T
*
cur_input_ptr
=
nullptr
;
if
(
i
==
num_layers
-
1
)
{
cur_input_ptr
=
y_grad
;
}
else
if
(
i
%
2
!=
0
)
{
cur_input_ptr
=
input_grad_2_ptr
;
}
else
{
cur_input_ptr
=
input_grad_1_ptr
;
}
T
*
cur_output_ptr
=
nullptr
;
int
cur_xdim
=
-
1
;
if
(
i
==
0
)
{
cur_output_ptr
=
x_grad_data
;
cur_xdim
=
input_dim
;
}
else
if
(
i
%
2
!=
0
)
{
cur_output_ptr
=
input_grad_1_ptr
;
cur_xdim
=
is_bidirec
?
2
*
hidden_size
:
hidden_size
;
}
else
{
cur_output_ptr
=
input_grad_2_ptr
;
cur_xdim
=
is_bidirec
?
2
*
hidden_size
:
hidden_size
;
}
auto
w_x_grad
=
parameter_lists_grad
[
i
][
0
];
auto
w_h_grad
=
parameter_lists_grad
[
i
][
1
];
auto
b_x_grad
=
parameter_lists_grad
[
i
][
2
];
auto
b_h_grad
=
parameter_lists_grad
[
i
][
3
];
auto
h_0
=
init_h_ptr
+
direction_num
*
i
*
state_offset
;
auto
c_0
=
init_c_ptr
+
direction_num
*
i
*
state_offset
;
auto
h_0_grad
=
init_h_grad_ptr
+
direction_num
*
i
*
state_offset
;
auto
c_0_grad
=
init_c_grad_ptr
+
direction_num
*
i
*
state_offset
;
auto
h_t_grad
=
last_h_grad_ptr
+
direction_num
*
i
*
state_offset
;
auto
c_t_grad
=
last_c_grad_ptr
+
direction_num
*
i
*
state_offset
;
if
(
is_bidirec
)
{
auto
bw_x_grad
=
parameter_lists_grad
[
i
][
4
];
auto
bw_h_grad
=
parameter_lists_grad
[
i
][
5
];
auto
bb_x_grad
=
parameter_lists_grad
[
i
][
6
];
auto
bb_h_grad
=
parameter_lists_grad
[
i
][
7
];
int
r
=
xpu
::
bilstm_grad
<
T
,
T
,
int16_t
>
(
dev_ctx
.
x_context
(),
(
const
T
*
)
layer_input
,
(
const
T
*
)
h_0
,
(
const
T
*
)
c_0
,
(
const
T
*
)
w_x
,
(
const
T
*
)
w_h
,
(
const
T
*
)
bw_x
,
(
const
T
*
)
bw_h
,
(
const
T
*
)
layer_output
,
(
const
T
*
)
cur_input_ptr
,
(
const
T
*
)
h_t_grad
,
(
const
T
*
)
c_t_grad
,
reinterpret_cast
<
T
*>
(
cur_output_ptr
),
reinterpret_cast
<
T
*>
(
h_0_grad
),
reinterpret_cast
<
T
*>
(
c_0_grad
),
w_x_grad
,
w_h_grad
,
b_x_grad
,
b_h_grad
,
bw_x_grad
,
bw_h_grad
,
bb_x_grad
,
bb_h_grad
,
batch_size
,
cur_xdim
,
hidden_size
,
seq_len
,
seq_len_tensor
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
i_f_g_o
,
c
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"bilstm_grad"
);
}
else
{
int
r
=
xpu
::
lstm_grad
<
T
,
T
,
int16_t
>
(
dev_ctx
.
x_context
(),
(
const
T
*
)
layer_input
,
(
const
T
*
)
h_0
,
(
const
T
*
)
c_0
,
(
const
T
*
)
w_x
,
(
const
T
*
)
w_h
,
(
const
T
*
)
layer_output
,
(
const
T
*
)
cur_input_ptr
,
(
const
T
*
)
h_t_grad
,
(
const
T
*
)
c_t_grad
,
reinterpret_cast
<
T
*>
(
cur_output_ptr
),
reinterpret_cast
<
T
*>
(
h_0_grad
),
reinterpret_cast
<
T
*>
(
c_0_grad
),
w_x_grad
,
w_h_grad
,
b_x_grad
,
b_h_grad
,
batch_size
,
cur_xdim
,
hidden_size
,
seq_len
,
seq_len_tensor
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
i_f_g_o
,
c
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"lstm_grad"
);
}
}
}
}
// namespace phi
PD_REGISTER_KERNEL
(
rnn_grad
,
XPU
,
ALL_LAYOUT
,
phi
::
RnnGradKernel
,
float
)
{}
paddle/phi/kernels/xpu/rnn_kernel.cc
0 → 100644
浏览文件 @
91631492
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/rnn_kernel.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/xpu/rnn_util.h"
namespace
phi
{
template
<
typename
T
,
typename
Context
>
void
RnnKernel
(
const
Context
&
dev_ctx
,
const
DenseTensor
&
x
,
const
std
::
vector
<
const
DenseTensor
*>&
pre_state
,
const
std
::
vector
<
const
DenseTensor
*>&
weight_list
,
const
paddle
::
optional
<
DenseTensor
>&
sequence_length
,
float
dropout_prob
,
bool
is_bidirec
,
int
input_size
,
int
hidden_size
,
int
num_layers
,
const
std
::
string
&
mode
,
int
seed
,
bool
is_test
,
DenseTensor
*
out
,
DenseTensor
*
dropout_state
,
std
::
vector
<
DenseTensor
*>
state
,
DenseTensor
*
reserve
)
{
using
XPUTyp
=
typename
XPUTypeTrait
<
T
>::
Type
;
if
(
dropout_state
->
IsInitialized
())
{
if
(
dropout_state
->
numel
()
!=
out
->
numel
())
dropout_state
->
clear
();
}
dropout_state
->
Resize
(
out
->
dims
());
dev_ctx
.
template
Alloc
<
T
>(
dropout_state
);
phi
::
funcs
::
SetConstant
<
phi
::
XPUContext
,
uint8_t
>
ones
;
ones
(
dev_ctx
,
dropout_state
,
static_cast
<
uint8_t
>
(
1
));
PADDLE_ENFORCE_EQ
(
mode
,
"LSTM"
,
errors
::
InvalidArgument
(
"XPU only support LSTM mode now, current mode is %s"
,
mode
));
auto
init_h
=
pre_state
[
0
];
auto
init_c
=
pre_state
[
1
];
auto
last_h
=
state
[
0
];
auto
last_c
=
state
[
1
];
// check shape
const
int
&
seq_len
=
x
.
dims
()[
0
];
// time_step
const
int
&
batch_size
=
x
.
dims
()[
1
];
const
int
&
input_dim
=
x
.
dims
()[
2
];
const
int
&
direction_num
=
is_bidirec
?
2
:
1
;
PADDLE_ENFORCE_EQ
(
init_h
->
dims
()[
0
],
num_layers
*
direction_num
,
errors
::
InvalidArgument
(
"The num_layers of in RNN layer must"
" be the same as first dim of init "
"hidden, but received num_layers:%d,"
" dim:%d"
,
num_layers
,
init_h
->
dims
()[
0
]));
PADDLE_ENFORCE_EQ
(
init_c
->
dims
()[
0
],
num_layers
*
direction_num
,
errors
::
InvalidArgument
(
"The num_layers of in RNN layer must"
" be the same as first dim of cell state hidden, but received"
" num_layers:%d, dim:%d"
,
num_layers
,
init_c
->
dims
()[
0
]));
// weightlist
std
::
vector
<
std
::
vector
<
const
T
*>>
parameter_lists
;
parameter_lists
.
resize
(
num_layers
);
reset_parameter_vector
(
weight_list
,
num_layers
,
is_bidirec
,
&
parameter_lists
);
// init the output and allocate the memory
dev_ctx
.
template
Alloc
<
T
>(
out
);
dev_ctx
.
template
Alloc
<
T
>(
last_h
);
dev_ctx
.
template
Alloc
<
T
>(
last_c
);
int
gate_num
=
4
;
int
hidden_data_idx
=
(
num_layers
-
1
);
hidden_data_idx
+=
(
gate_num
+
1
)
*
num_layers
;
const
int
&
block_size
=
direction_num
*
seq_len
*
batch_size
*
hidden_size
;
reserve
->
Resize
({
hidden_data_idx
,
block_size
});
dev_ctx
.
template
Alloc
<
T
>(
reserve
);
// get ptr from tensor
auto
x_data
=
x
.
data
<
T
>
();
auto
init_h_ptr
=
init_h
->
data
<
T
>
();
auto
init_c_ptr
=
init_c
->
data
<
T
>
();
auto
y
=
out
->
data
<
T
>
();
auto
last_h_ptr
=
last_h
->
data
<
T
>
();
auto
last_c_ptr
=
last_c
->
data
<
T
>
();
auto
i_f_g_o_ptr
=
reserve
->
data
<
T
>
();
auto
c_ptr
=
i_f_g_o_ptr
+
num_layers
*
block_size
*
4
;
// 4 for i_f_g_o offset
auto
hidden_data_ptr
=
c_ptr
+
num_layers
*
block_size
*
1
;
// 1 for c offset
std
::
vector
<
int
>
seq_len_tensor
(
batch_size
,
seq_len
);
bool
has_seq_length
=
sequence_length
.
is_initialized
();
if
(
has_seq_length
)
{
seq_len_tensor
=
paddle
::
operators
::
GetDataFromTensor
<
int
>
(
sequence_length
.
get_ptr
());
}
int
state_offset
=
pre_state
[
0
]
->
dims
()[
1
]
*
pre_state
[
0
]
->
dims
()[
2
];
const
T
*
cur_input_ptr
=
nullptr
;
int
cur_xdim
=
-
1
;
T
*
cur_output_ptr
=
y
;
for
(
int
i
=
0
;
i
<
num_layers
;
i
++
)
{
auto
i_f_g_o
=
i_f_g_o_ptr
+
i
*
block_size
*
4
;
auto
c
=
c_ptr
+
i
*
block_size
;
cur_output_ptr
=
y
;
if
(
i
<
num_layers
-
1
&&
num_layers
>
1
)
{
cur_output_ptr
=
hidden_data_ptr
+
i
*
block_size
;
}
if
(
i
==
0
)
{
cur_input_ptr
=
x_data
;
cur_xdim
=
input_dim
;
}
else
{
cur_input_ptr
=
hidden_data_ptr
+
(
i
-
1
)
*
block_size
;
cur_xdim
=
is_bidirec
?
2
*
hidden_size
:
hidden_size
;
}
auto
h_0
=
init_h_ptr
+
direction_num
*
i
*
state_offset
;
auto
c_0
=
init_c_ptr
+
direction_num
*
i
*
state_offset
;
auto
last_h
=
last_h_ptr
+
direction_num
*
i
*
state_offset
;
auto
last_c
=
last_c_ptr
+
direction_num
*
i
*
state_offset
;
auto
w_x
=
parameter_lists
[
i
][
0
];
auto
w_h
=
parameter_lists
[
i
][
1
];
auto
b_x
=
parameter_lists
[
i
][
2
];
auto
b_h
=
parameter_lists
[
i
][
3
];
if
(
is_bidirec
)
{
auto
bw_x
=
parameter_lists
[
i
][
4
];
auto
bw_h
=
parameter_lists
[
i
][
5
];
auto
bb_x
=
parameter_lists
[
i
][
6
];
auto
bb_h
=
parameter_lists
[
i
][
7
];
int
r
=
xpu
::
bilstm_train
<
T
,
T
,
int16_t
>
(
dev_ctx
.
x_context
(),
(
const
T
*
)
cur_input_ptr
,
(
const
T
*
)
h_0
,
(
const
T
*
)
c_0
,
(
const
T
*
)
w_x
,
(
const
T
*
)
w_h
,
(
const
T
*
)
b_x
,
(
const
T
*
)
b_h
,
(
const
T
*
)
bw_x
,
(
const
T
*
)
bw_h
,
(
const
T
*
)
bb_x
,
(
const
T
*
)
bb_h
,
reinterpret_cast
<
T
*>
(
cur_output_ptr
),
reinterpret_cast
<
T
*>
(
last_h
),
reinterpret_cast
<
T
*>
(
last_c
),
batch_size
,
cur_xdim
,
hidden_size
,
seq_len
,
seq_len_tensor
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
reinterpret_cast
<
T
*>
(
i_f_g_o
),
reinterpret_cast
<
T
*>
(
c
));
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"bilstm_train"
);
}
else
{
int
r
=
xpu
::
lstm_train
<
T
,
T
,
int16_t
>
(
dev_ctx
.
x_context
(),
(
const
T
*
)
cur_input_ptr
,
(
const
T
*
)
h_0
,
(
const
T
*
)
c_0
,
(
const
T
*
)
w_x
,
(
const
T
*
)
w_h
,
(
const
T
*
)
b_x
,
(
const
T
*
)
b_h
,
reinterpret_cast
<
T
*>
(
cur_output_ptr
),
reinterpret_cast
<
T
*>
(
last_h
),
reinterpret_cast
<
T
*>
(
last_c
),
batch_size
,
cur_xdim
,
hidden_size
,
seq_len
,
seq_len_tensor
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
reinterpret_cast
<
T
*>
(
i_f_g_o
),
reinterpret_cast
<
T
*>
(
c
),
xpu
::
Activation_t
::
TANH
,
xpu
::
Activation_t
::
SIGMOID
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"lstm_train"
);
}
}
}
}
// namespace phi
PD_REGISTER_KERNEL
(
rnn
,
XPU
,
ALL_LAYOUT
,
phi
::
RnnKernel
,
float
)
{}
paddle/phi/kernels/xpu/rnn_util.h
0 → 100644
浏览文件 @
91631492
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
namespace
phi
{
template
<
typename
TensorType
,
typename
T
>
void
reset_parameter_vector
(
const
std
::
vector
<
TensorType
>&
raw_params_vec
,
const
int
&
num_layers
,
const
bool
&
is_bidirec
,
std
::
vector
<
std
::
vector
<
T
*>>*
params_vec
)
{
// the parameter raw seuquence is [FWhi, FWhh, BWhi, BWhh] * num_layers
// + [FBhi, FBhh, BBhi, BBhh] * num_layers, we will reset the parameter to
// ([FWhi, FWhh, FBhi, FBhh] + [BWhi, BWhh, BBhi, BBhh]) * num_layers
const
int
&
direction_num
=
is_bidirec
?
2
:
1
;
const
int
&
layer_weight_size
=
4
*
direction_num
;
const
int
&
all_weight_size
=
num_layers
*
layer_weight_size
;
const
int
&
bias_start_idx
=
all_weight_size
/
2
;
for
(
int
i
=
0
;
i
<
num_layers
;
i
++
)
{
params_vec
->
at
(
i
).
resize
(
layer_weight_size
);
for
(
int
j
=
0
;
j
<
layer_weight_size
;
j
++
)
{
int
k
=
j
%
4
;
const
int
&
section
=
j
/
4
;
int
tensor_idx
=
i
*
2
*
direction_num
+
section
*
2
+
k
%
2
;
if
(
k
>=
2
)
{
tensor_idx
+=
bias_start_idx
;
}
using
remove_cv_t
=
typename
std
::
remove_cv
<
T
>::
type
;
params_vec
->
at
(
i
)[
j
]
=
raw_params_vec
[
tensor_idx
]
->
template
data
<
remove_cv_t
>();
}
}
}
}
// namespace phi
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录