Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
b1adde3d
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
b1adde3d
编写于
4月 13, 2022
作者:
z8hanghuan
提交者:
GitHub
4月 13, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
use bilstm_train for rnn forward, * test=kunlun (#41671)
上级
f4cc5def
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
43 addition
and
74 deletion
+43
-74
paddle/fluid/operators/rnn_op_xpu.cc
paddle/fluid/operators/rnn_op_xpu.cc
+43
-74
未找到文件。
paddle/fluid/operators/rnn_op_xpu.cc
浏览文件 @
b1adde3d
...
...
@@ -51,41 +51,6 @@ void reset_parameter_vector(const std::vector<TensorType>& raw_params_vec,
}
}
template
<
typename
DeviceContext
,
typename
T
>
void
RunLSTMLayer
(
const
framework
::
ExecutionContext
&
ctx
,
int
seq_len
,
int
batch_size
,
int
xdim
,
int
hidden_size
,
const
T
*
x
,
T
*
y
,
const
T
*
init_h
,
const
T
*
init_c
,
T
*
last_h
,
T
*
last_c
,
int
state_offset
,
const
std
::
vector
<
int
>&
seq_len_tensor
,
const
std
::
vector
<
const
T
*>&
param_list
,
T
*
i_f_g_o
,
T
*
c
,
bool
is_bidirect
,
int
layer_idx
,
int
offset
)
{
bool
is_reverse
=
false
;
if
(
is_bidirect
)
{
layer_idx
=
2
*
layer_idx
+
offset
;
if
(
offset
>
0
)
{
is_reverse
=
true
;
}
}
auto
w_x
=
param_list
[
0
+
offset
*
4
];
auto
w_h
=
param_list
[
1
+
offset
*
4
];
auto
b_x
=
param_list
[
2
+
offset
*
4
];
auto
b_h
=
param_list
[
3
+
offset
*
4
];
auto
h_0
=
init_h
+
layer_idx
*
state_offset
;
auto
c_0
=
init_c
+
layer_idx
*
state_offset
;
auto
last_h_ptr
=
last_h
+
layer_idx
*
state_offset
;
auto
last_c_ptr
=
last_c
+
layer_idx
*
state_offset
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
int
r
=
xpu
::
lstm_train
<
T
,
T
,
int16_t
>
(
dev_ctx
.
x_context
(),
(
const
T
*
)
x
,
(
const
T
*
)
h_0
,
(
const
T
*
)
c_0
,
(
const
T
*
)
w_x
,
(
const
T
*
)
w_h
,
(
const
T
*
)
b_x
,
(
const
T
*
)
b_h
,
reinterpret_cast
<
T
*>
(
y
),
reinterpret_cast
<
T
*>
(
last_h_ptr
),
reinterpret_cast
<
T
*>
(
last_c_ptr
),
batch_size
,
xdim
,
hidden_size
,
seq_len
,
seq_len_tensor
,
is_reverse
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
reinterpret_cast
<
T
*>
(
i_f_g_o
),
reinterpret_cast
<
T
*>
(
c
),
xpu
::
Activation_t
::
TANH
,
xpu
::
Activation_t
::
SIGMOID
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"lstm_train"
);
}
template
<
typename
DeviceContext
,
typename
T
>
class
RnnXPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -184,9 +149,9 @@ class RnnXPUKernel : public framework::OpKernel<T> {
auto
y
=
output
->
data
<
T
>
();
auto
last_h_ptr
=
last_h
->
data
<
T
>
();
auto
last_c_ptr
=
last_c
->
data
<
T
>
();
auto
i_f_g_o
=
reserve_data
->
data
<
T
>
();
auto
c
=
i_f_g_o
+
auto
i_f_g_o
_ptr
=
reserve_data
->
data
<
T
>
();
auto
c
_ptr
=
i_f_g_o
_ptr
+
num_layers
*
direction_num
*
seq_len
*
batch_size
*
hidden_size
*
4
;
std
::
vector
<
int
>
seq_len_tensor
(
batch_size
,
seq_len
);
...
...
@@ -197,11 +162,12 @@ class RnnXPUKernel : public framework::OpKernel<T> {
int
state_offset
=
pre_state
[
0
]
->
dims
()[
1
]
*
pre_state
[
0
]
->
dims
()[
2
];
for
(
int
i
=
0
;
i
<
num_layers
;
i
++
)
{
auto
i_f_g_o
=
i_f_g_o_ptr
+
i
*
direction_num
*
seq_len
*
batch_size
*
hidden_size
*
4
;
auto
c
=
c_ptr
+
i
*
direction_num
*
seq_len
*
batch_size
*
hidden_size
;
const
T
*
cur_input_ptr
=
nullptr
;
int
cur_xdim
=
-
1
;
i_f_g_o
+=
i
*
direction_num
*
seq_len
*
batch_size
*
hidden_size
*
4
;
c
+=
i
*
direction_num
*
seq_len
*
batch_size
*
hidden_size
;
if
(
i
==
0
)
{
cur_input_ptr
=
x
;
cur_xdim
=
input_dim
;
...
...
@@ -222,41 +188,44 @@ class RnnXPUKernel : public framework::OpKernel<T> {
cur_output_ptr
=
internal_output_1_ptr
;
}
auto
h_0
=
init_h_ptr
+
direction_num
*
i
*
state_offset
;
auto
c_0
=
init_c_ptr
+
direction_num
*
i
*
state_offset
;
auto
last_h
=
last_h_ptr
+
direction_num
*
i
*
state_offset
;
auto
last_c
=
last_c_ptr
+
direction_num
*
i
*
state_offset
;
auto
w_x
=
parameter_lists
[
i
][
0
];
auto
w_h
=
parameter_lists
[
i
][
1
];
auto
b_x
=
parameter_lists
[
i
][
2
];
auto
b_h
=
parameter_lists
[
i
][
3
];
if
(
is_bidirec
)
{
std
::
vector
<
Tensor
>
output_vec
(
2
);
std
::
vector
<
T
*>
output_ptr_vec
(
2
);
for
(
int
k
=
0
;
k
<
2
;
++
k
)
{
output_vec
[
k
].
Resize
({
seq_len
,
batch_size
,
output
->
dims
()[
2
]
/
2
});
output_ptr_vec
[
k
]
=
output_vec
[
k
].
mutable_data
<
T
>
(
ctx
.
GetPlace
());
}
RunLSTMLayer
<
DeviceContext
,
T
>
(
ctx
,
seq_len
,
batch_size
,
cur_xdim
,
hidden_size
,
cur_input_ptr
,
output_ptr_vec
[
0
],
init_h_ptr
,
init_c_ptr
,
last_h_ptr
,
last_c_ptr
,
state_offset
,
seq_len_tensor
,
parameter_lists
[
i
],
i_f_g_o
,
c
,
is_bidirec
,
i
,
0
);
T
*
bw_i_f_g_o
=
i_f_g_o
+
seq_len
*
batch_size
*
hidden_size
*
4
;
T
*
bw_c
=
c
+
seq_len
*
batch_size
*
hidden_size
;
RunLSTMLayer
<
DeviceContext
,
T
>
(
ctx
,
seq_len
,
batch_size
,
cur_xdim
,
hidden_size
,
cur_input_ptr
,
output_ptr_vec
[
1
],
init_h_ptr
,
init_c_ptr
,
last_h_ptr
,
last_c_ptr
,
state_offset
,
seq_len_tensor
,
parameter_lists
[
i
],
bw_i_f_g_o
,
bw_c
,
is_bidirec
,
i
,
1
);
// concat
int
r
=
xpu
::
concat
<
T
>
(
dev_ctx
.
x_context
(),
{
output_ptr_vec
[
0
],
output_ptr_vec
[
1
]},
cur_output_ptr
,
{{
seq_len
,
batch_size
,
hidden_size
},
{
seq_len
,
batch_size
,
hidden_size
}},
2
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"concat"
);
xpu_wait
(
dev_ctx
.
x_context
()
->
xpu_stream
);
auto
bw_x
=
parameter_lists
[
i
][
4
];
auto
bw_h
=
parameter_lists
[
i
][
5
];
auto
bb_x
=
parameter_lists
[
i
][
6
];
auto
bb_h
=
parameter_lists
[
i
][
7
];
int
r
=
xpu
::
bilstm_train
<
T
,
T
,
int16_t
>
(
dev_ctx
.
x_context
(),
(
const
T
*
)
cur_input_ptr
,
(
const
T
*
)
h_0
,
(
const
T
*
)
c_0
,
(
const
T
*
)
w_x
,
(
const
T
*
)
w_h
,
(
const
T
*
)
b_x
,
(
const
T
*
)
b_h
,
(
const
T
*
)
bw_x
,
(
const
T
*
)
bw_h
,
(
const
T
*
)
bb_x
,
(
const
T
*
)
bb_h
,
reinterpret_cast
<
T
*>
(
cur_output_ptr
),
reinterpret_cast
<
T
*>
(
last_h
),
reinterpret_cast
<
T
*>
(
last_c
),
batch_size
,
cur_xdim
,
hidden_size
,
seq_len
,
seq_len_tensor
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
reinterpret_cast
<
T
*>
(
i_f_g_o
),
reinterpret_cast
<
T
*>
(
c
));
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"bilstm_train"
);
}
else
{
RunLSTMLayer
<
DeviceContext
,
T
>
(
ctx
,
seq_len
,
batch_size
,
cur_xdim
,
hidden_size
,
cur_input_ptr
,
cur_output_ptr
,
init_h_ptr
,
init_c_ptr
,
last_h_ptr
,
last_c_ptr
,
state_offset
,
seq_len_tensor
,
parameter_lists
[
i
],
i_f_g_o
,
c
,
is_bidirec
,
i
,
0
);
int
r
=
xpu
::
lstm_train
<
T
,
T
,
int16_t
>
(
dev_ctx
.
x_context
(),
(
const
T
*
)
cur_input_ptr
,
(
const
T
*
)
h_0
,
(
const
T
*
)
c_0
,
(
const
T
*
)
w_x
,
(
const
T
*
)
w_h
,
(
const
T
*
)
b_x
,
(
const
T
*
)
b_h
,
reinterpret_cast
<
T
*>
(
cur_output_ptr
),
reinterpret_cast
<
T
*>
(
last_h
),
reinterpret_cast
<
T
*>
(
last_c
),
batch_size
,
cur_xdim
,
hidden_size
,
seq_len
,
seq_len_tensor
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
reinterpret_cast
<
T
*>
(
i_f_g_o
),
reinterpret_cast
<
T
*>
(
c
),
xpu
::
Activation_t
::
TANH
,
xpu
::
Activation_t
::
SIGMOID
);
PADDLE_ENFORCE_XDNN_SUCCESS
(
r
,
"lstm_train"
);
}
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录