Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
d60fe75a
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
d60fe75a
编写于
11月 09, 2017
作者:
D
dangqingqing
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
follow comments.
上级
4098ce73
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
83 addition
and
119 deletion
+83
-119
paddle/operators/lstm_op.cc
paddle/operators/lstm_op.cc
+11
-19
paddle/operators/lstm_op.h
paddle/operators/lstm_op.h
+48
-46
python/paddle/v2/framework/tests/test_lstm_op.py
python/paddle/v2/framework/tests/test_lstm_op.py
+24
-54
未找到文件。
paddle/operators/lstm_op.cc
浏览文件 @
d60fe75a
...
...
@@ -246,25 +246,17 @@ class LSTMGradOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"BatchCellPreAct"
),
"Input(BatchGate) of LSTM should not be null."
);
auto
in_g_name
=
framework
::
GradVarName
(
"Input"
);
if
(
ctx
->
HasOutput
(
in_g_name
))
ctx
->
SetOutputDim
(
in_g_name
,
ctx
->
GetInputDim
(
"Input"
));
auto
w_g_name
=
framework
::
GradVarName
(
"Weight"
);
if
(
ctx
->
HasOutput
(
w_g_name
))
ctx
->
SetOutputDim
(
w_g_name
,
ctx
->
GetInputDim
(
"Weight"
));
auto
b_g_name
=
framework
::
GradVarName
(
"Bias"
);
if
(
ctx
->
HasOutput
(
b_g_name
))
ctx
->
SetOutputDim
(
b_g_name
,
ctx
->
GetInputDim
(
"Bias"
));
auto
h0_g_name
=
framework
::
GradVarName
(
"H0"
);
if
(
ctx
->
HasOutput
(
h0_g_name
))
ctx
->
SetOutputDim
(
h0_g_name
,
ctx
->
GetInputDim
(
"H0"
));
auto
c0_g_name
=
framework
::
GradVarName
(
"C0"
);
if
(
ctx
->
HasOutput
(
c0_g_name
))
ctx
->
SetOutputDim
(
c0_g_name
,
ctx
->
GetInputDim
(
"C0"
));
auto
SetOutGradDim
=
[
&
ctx
](
const
std
::
string
&
name
)
{
auto
g_name
=
framework
::
GradVarName
(
name
);
if
(
ctx
->
HasOutput
(
g_name
))
ctx
->
SetOutputDim
(
g_name
,
ctx
->
GetInputDim
(
name
));
};
SetOutGradDim
(
"Input"
);
SetOutGradDim
(
"Weight"
);
SetOutGradDim
(
"Bias"
);
SetOutGradDim
(
"H0"
);
SetOutGradDim
(
"C0"
);
}
protected:
...
...
paddle/operators/lstm_op.h
浏览文件 @
d60fe75a
...
...
@@ -28,6 +28,15 @@ template <typename T, int MajorType = Eigen::RowMajor,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenMatrix
=
framework
::
EigenMatrix
<
T
,
MajorType
,
IndexType
>
;
template
<
typename
Place
,
typename
T
>
inline
void
ReorderInitState
(
const
platform
::
DeviceContext
&
ctx
,
const
framework
::
Tensor
&
src
,
const
size_t
*
index
,
framework
::
Tensor
*
dst
,
bool
indexed_src
)
{
math
::
CopyMatrixRowsFunctor
<
Place
,
T
>
row_shuffle
;
dst
->
mutable_data
<
T
>
(
src
.
dims
(),
ctx
.
GetPlace
());
row_shuffle
(
ctx
,
src
,
index
,
*
dst
,
indexed_src
);
}
template
<
typename
Place
,
typename
T
>
class
LSTMKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -83,11 +92,13 @@ class LSTMKernel : public framework::OpKernel<T> {
}
lstm_value
.
prevStateValue
=
nullptr
;
Tensor
ordered_c0
;
const
size_t
*
order
=
batch_gate
->
lod
()[
2
].
data
();
if
(
cell_t0
)
{
math
::
CopyMatrixRowsFunctor
<
Place
,
T
>
row_shuffle
;
ordered_c0
.
mutable_data
<
T
>
(
cell_t0
->
dims
(),
ctx
.
GetPlace
());
const
size_t
*
order
=
batch_gate
->
lod
()[
2
].
data
();
row_shuffle
(
device_ctx
,
*
cell_t0
,
order
,
ordered_c0
,
true
);
// Since the batch computing for LSTM reorders the input sequence
// according to their length. The initialized cell state also needs
// to reorder.
ReorderInitState
<
Place
,
T
>
(
device_ctx
,
*
cell_t0
,
order
,
&
ordered_c0
,
true
);
lstm_value
.
prevStateValue
=
ordered_c0
.
data
<
T
>
();
}
...
...
@@ -123,11 +134,16 @@ class LSTMKernel : public framework::OpKernel<T> {
static_cast
<
T
>
(
1.0
),
&
gate_t
,
static_cast
<
T
>
(
1.0
));
}
else
if
(
hidden_t0
)
{
math
::
CopyMatrixRowsFunctor
<
Place
,
T
>
row_shuffle
;
// If n == 0 and there is no initialized hidden state, that is to say
// the H0 is zeros, the calculation W_h * H0 will be skiped.
// If n == 0 and there is initialized hidden state, calculate W_h * H0.
// Since the batch computing for LSTM reorders the input sequence
// according to their length. The initialized hidden state also needs
// to reorder.
Tensor
ordered_h0
;
ordered_h0
.
mutable_data
<
T
>
(
hidden_t0
->
dims
(),
ctx
.
GetPlace
());
const
size_t
*
order
=
batch_gate
->
lod
()[
2
].
data
();
row_shuffle
(
device_ctx
,
*
hidden_t0
,
order
,
ordered_h0
,
true
);
ReorderInitState
<
Place
,
T
>
(
device_ctx
,
*
hidden_t0
,
order
,
&
ordered_h0
,
true
);
math
::
matmul
<
Place
,
T
>
(
device_ctx
,
ordered_h0
,
false
,
*
weight
,
false
,
static_cast
<
T
>
(
1.0
),
&
gate_t
,
static_cast
<
T
>
(
1.0
));
...
...
@@ -187,12 +203,16 @@ class LSTMGradKernel : public framework::OpKernel<T> {
zero
(
device_ctx
,
weight_g
,
static_cast
<
T
>
(
0.0
));
}
// ordered_h0/c0 is the reordered hidden/cell initialization.
// ordered_h0_g/c0_g is the reordered gradient of hidden/cell
// initialization.
Tensor
ordered_h0
,
ordered_c0
,
ordered_h0_g
,
ordered_c0_g
;
math
::
CopyMatrixRowsFunctor
<
Place
,
T
>
row_shuffle
;
const
size_t
*
order
=
batch_gate
->
lod
()[
2
].
data
();
if
(
c0
)
{
ordered_c0
.
mutable_data
<
T
>
(
c0
->
dims
(),
ctx
.
GetPlace
());
row_shuffle
(
device_ctx
,
*
c0
,
order
,
ordered_c0
,
true
);
ReorderInitState
<
Place
,
T
>
(
device_ctx
,
*
c0
,
order
,
&
ordered_c0
,
true
);
}
if
(
c0
&&
c0_g
)
{
ordered_c0_g
.
mutable_data
<
T
>
(
c0_g
->
dims
(),
ctx
.
GetPlace
());
}
auto
in_dims
=
input
->
dims
();
...
...
@@ -231,30 +251,24 @@ class LSTMGradKernel : public framework::OpKernel<T> {
math
::
LoDTensor2BatchFunctor
<
Place
,
T
>
to_batch
;
// use the local variable as here.
LoDTensor
batch_hidden
;
batch_hidden
.
mutable_data
<
T
>
(
out_dims
,
ctx
.
GetPlace
());
batch_hidden
.
set_lod
(
batch_gate
->
lod
());
to_batch
(
device_ctx
,
*
hidden_out
,
batch_hidden
,
false
);
LoDTensor
batch_hidden_g
;
batch_hidden_g
.
mutable_data
<
T
>
(
out_dims
,
ctx
.
GetPlace
());
batch_hidden_g
.
set_lod
(
batch_gate
->
lod
());
to_batch
(
device_ctx
,
*
hidden_g
,
batch_hidden_g
,
false
);
auto
ToBatch
=
[
&
batch_gate
,
&
to_batch
](
const
platform
::
DeviceContext
&
ctx
,
const
framework
::
LoDTensor
&
src
,
const
framework
::
DDim
&
dims
,
framework
::
LoDTensor
&
dst
)
{
dst
.
mutable_data
<
T
>
(
dims
,
ctx
.
GetPlace
());
dst
.
set_lod
(
batch_gate
->
lod
());
to_batch
(
ctx
,
src
,
dst
,
false
);
};
LoDTensor
batch_cell
;
batch_cell
.
mutable_data
<
T
>
(
out_dims
,
ctx
.
GetPlace
()
);
batch_cell
.
set_lod
(
batch_gate
->
lod
()
);
to_batch
(
device_ctx
,
*
cell_out
,
batch_cell
,
false
);
LoDTensor
batch_
hidden
,
batch_hidden_g
,
batch_
cell
;
ToBatch
(
device_ctx
,
*
hidden_out
,
out_dims
,
batch_hidden
);
ToBatch
(
device_ctx
,
*
hidden_g
,
out_dims
,
batch_hidden_g
);
ToBatch
(
device_ctx
,
*
cell_out
,
out_dims
,
batch_cell
);
LoDTensor
batch_cell_g
;
LoDTensor
batch_cell_g
,
batch_gate_g
;
batch_cell_g
.
mutable_data
<
T
>
(
out_dims
,
ctx
.
GetPlace
());
batch_cell_g
.
set_lod
(
batch_gate
->
lod
());
// TODO(qingqing) support the case output cell has gradient.
// to_batch(device_ctx, *cell_g, batch_cell_g, false);
zero
(
device_ctx
,
&
batch_cell_g
,
static_cast
<
T
>
(
0.0
));
LoDTensor
batch_gate_g
;
batch_gate_g
.
mutable_data
<
T
>
(
batch_gate
->
dims
(),
ctx
.
GetPlace
());
batch_gate_g
.
set_lod
(
batch_gate
->
lod
());
...
...
@@ -289,17 +303,8 @@ class LSTMGradKernel : public framework::OpKernel<T> {
lstm_value
.
prevStateValue
=
cell_pre
.
data
<
T
>
();
lstm_grad
.
prevStateGrad
=
cell_pre_g
.
data
<
T
>
();
}
else
{
if
(
c0
)
{
lstm_value
.
prevStateValue
=
ordered_c0
.
data
<
T
>
();
}
else
{
lstm_value
.
prevStateValue
=
nullptr
;
}
if
(
c0
&&
c0_g
)
{
ordered_c0_g
.
mutable_data
<
T
>
(
c0_g
->
dims
(),
ctx
.
GetPlace
());
lstm_grad
.
prevStateGrad
=
ordered_c0_g
.
data
<
T
>
();
}
else
{
lstm_grad
.
prevStateGrad
=
nullptr
;
}
lstm_value
.
prevStateValue
=
c0
?
ordered_c0
.
data
<
T
>
()
:
nullptr
;
lstm_grad
.
prevStateGrad
=
c0_g
?
ordered_c0_g
.
data
<
T
>
()
:
nullptr
;
}
int
cur_batch_size
=
bend
-
bstart
;
...
...
@@ -323,8 +328,7 @@ class LSTMGradKernel : public framework::OpKernel<T> {
}
}
else
{
if
(
h0
&&
weight_g
)
{
ordered_h0
.
mutable_data
<
T
>
(
h0
->
dims
(),
ctx
.
GetPlace
());
row_shuffle
(
device_ctx
,
*
h0
,
order
,
ordered_h0
,
true
);
ReorderInitState
<
Place
,
T
>
(
device_ctx
,
*
h0
,
order
,
&
ordered_h0
,
true
);
math
::
matmul
<
Place
,
T
>
(
device_ctx
,
ordered_h0
,
true
,
gate_g
,
false
,
static_cast
<
T
>
(
1.0
),
weight_g
,
static_cast
<
T
>
(
1.0
));
...
...
@@ -359,12 +363,10 @@ class LSTMGradKernel : public framework::OpKernel<T> {
}
if
(
h0
&&
h0_g
)
{
h0_g
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
row_shuffle
(
device_ctx
,
ordered_h0_g
,
order
,
*
h0_g
,
false
);
ReorderInitState
<
Place
,
T
>
(
device_ctx
,
ordered_h0_g
,
order
,
h0_g
,
false
);
}
if
(
c0
&&
c0_g
)
{
c0_g
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
row_shuffle
(
device_ctx
,
ordered_c0_g
,
order
,
*
c0_g
,
false
);
ReorderInitState
<
Place
,
T
>
(
device_ctx
,
ordered_c0_g
,
order
,
c0_g
,
false
);
}
}
};
...
...
python/paddle/v2/framework/tests/test_lstm_op.py
浏览文件 @
d60fe75a
...
...
@@ -179,36 +179,6 @@ class TestLstmOp(OpTest):
self
.
check_grad
(
[
'Input'
,
'Weight'
,
'Bias'
],
[
'Hidden'
],
max_relative_error
=
5e-4
)
def
test_check_grad_ingore_bias
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Weight'
],
[
'Hidden'
],
max_relative_error
=
5e-4
,
no_grad_set
=
set
(
'Bias'
))
def
test_check_grad_ingore_weight
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Bias'
],
[
'Hidden'
],
max_relative_error
=
5e-4
,
no_grad_set
=
set
(
'Weight'
))
def
test_check_grad_ingore_input
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Weight'
,
'Bias'
],
[
'Hidden'
],
max_relative_error
=
5e-4
,
no_grad_set
=
set
(
'Input'
))
class
TestLstmOpHasInitial
(
TestLstmOp
):
def
set_argument
(
self
):
...
...
@@ -233,15 +203,35 @@ class TestLstmOpHasInitial(TestLstmOp):
[
'Input'
,
'Weight'
,
'Bias'
,
'H0'
,
'C0'
],
[
'Hidden'
],
max_relative_error
=
5e-4
)
# In order to speed up, skip following testing
def
test_check_grad_ingore_bias
(
self
):
return
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Weight'
],
[
'Hidden'
],
max_relative_error
=
5e-4
,
no_grad_set
=
set
(
'Bias'
))
def
test_check_grad_ingore_weight
(
self
):
return
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Bias'
],
[
'Hidden'
],
max_relative_error
=
5e-4
,
no_grad_set
=
set
(
'Weight'
))
def
test_check_grad_ingore_input
(
self
):
return
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Weight'
,
'Bias'
],
[
'Hidden'
],
max_relative_error
=
5e-4
,
no_grad_set
=
set
(
'Input'
))
def
test_check_grad_ingore_h0
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
...
...
@@ -277,16 +267,6 @@ class TestLstmOpRerverse(TestLstmOp):
self
.
is_reverse
=
True
self
.
use_peepholes
=
True
# In order to speed up, skip following testing
def
test_check_grad_ingore_bias
(
self
):
return
def
test_check_grad_ingore_weight
(
self
):
return
def
test_check_grad_ingore_input
(
self
):
return
class
TestLstmOpNotUsePeepholes
(
TestLstmOp
):
def
set_argument
(
self
):
...
...
@@ -301,16 +281,6 @@ class TestLstmOpNotUsePeepholes(TestLstmOp):
self
.
is_reverse
=
True
self
.
use_peepholes
=
False
# In order to speed up, skip following testing
def
test_check_grad_ingore_bias
(
self
):
return
def
test_check_grad_ingore_weight
(
self
):
return
def
test_check_grad_ingore_input
(
self
):
return
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录