Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
d60fe75a
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 2 年 前同步成功
通知
708
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d60fe75a
编写于
11月 09, 2017
作者:
D
dangqingqing
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
follow comments.
上级
4098ce73
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
83 addition
and
119 deletion
+83
-119
paddle/operators/lstm_op.cc
paddle/operators/lstm_op.cc
+11
-19
paddle/operators/lstm_op.h
paddle/operators/lstm_op.h
+48
-46
python/paddle/v2/framework/tests/test_lstm_op.py
python/paddle/v2/framework/tests/test_lstm_op.py
+24
-54
未找到文件。
paddle/operators/lstm_op.cc
浏览文件 @
d60fe75a
...
...
@@ -246,25 +246,17 @@ class LSTMGradOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"BatchCellPreAct"
),
"Input(BatchGate) of LSTM should not be null."
);
auto
in_g_name
=
framework
::
GradVarName
(
"Input"
);
if
(
ctx
->
HasOutput
(
in_g_name
))
ctx
->
SetOutputDim
(
in_g_name
,
ctx
->
GetInputDim
(
"Input"
));
auto
w_g_name
=
framework
::
GradVarName
(
"Weight"
);
if
(
ctx
->
HasOutput
(
w_g_name
))
ctx
->
SetOutputDim
(
w_g_name
,
ctx
->
GetInputDim
(
"Weight"
));
auto
b_g_name
=
framework
::
GradVarName
(
"Bias"
);
if
(
ctx
->
HasOutput
(
b_g_name
))
ctx
->
SetOutputDim
(
b_g_name
,
ctx
->
GetInputDim
(
"Bias"
));
auto
h0_g_name
=
framework
::
GradVarName
(
"H0"
);
if
(
ctx
->
HasOutput
(
h0_g_name
))
ctx
->
SetOutputDim
(
h0_g_name
,
ctx
->
GetInputDim
(
"H0"
));
auto
c0_g_name
=
framework
::
GradVarName
(
"C0"
);
if
(
ctx
->
HasOutput
(
c0_g_name
))
ctx
->
SetOutputDim
(
c0_g_name
,
ctx
->
GetInputDim
(
"C0"
));
auto
SetOutGradDim
=
[
&
ctx
](
const
std
::
string
&
name
)
{
auto
g_name
=
framework
::
GradVarName
(
name
);
if
(
ctx
->
HasOutput
(
g_name
))
ctx
->
SetOutputDim
(
g_name
,
ctx
->
GetInputDim
(
name
));
};
SetOutGradDim
(
"Input"
);
SetOutGradDim
(
"Weight"
);
SetOutGradDim
(
"Bias"
);
SetOutGradDim
(
"H0"
);
SetOutGradDim
(
"C0"
);
}
protected:
...
...
paddle/operators/lstm_op.h
浏览文件 @
d60fe75a
...
...
@@ -28,6 +28,15 @@ template <typename T, int MajorType = Eigen::RowMajor,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenMatrix
=
framework
::
EigenMatrix
<
T
,
MajorType
,
IndexType
>
;
template
<
typename
Place
,
typename
T
>
inline
void
ReorderInitState
(
const
platform
::
DeviceContext
&
ctx
,
const
framework
::
Tensor
&
src
,
const
size_t
*
index
,
framework
::
Tensor
*
dst
,
bool
indexed_src
)
{
math
::
CopyMatrixRowsFunctor
<
Place
,
T
>
row_shuffle
;
dst
->
mutable_data
<
T
>
(
src
.
dims
(),
ctx
.
GetPlace
());
row_shuffle
(
ctx
,
src
,
index
,
*
dst
,
indexed_src
);
}
template
<
typename
Place
,
typename
T
>
class
LSTMKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -83,11 +92,13 @@ class LSTMKernel : public framework::OpKernel<T> {
}
lstm_value
.
prevStateValue
=
nullptr
;
Tensor
ordered_c0
;
if
(
cell_t0
)
{
math
::
CopyMatrixRowsFunctor
<
Place
,
T
>
row_shuffle
;
ordered_c0
.
mutable_data
<
T
>
(
cell_t0
->
dims
(),
ctx
.
GetPlace
());
const
size_t
*
order
=
batch_gate
->
lod
()[
2
].
data
();
row_shuffle
(
device_ctx
,
*
cell_t0
,
order
,
ordered_c0
,
true
);
if
(
cell_t0
)
{
// Since the batch computing for LSTM reorders the input sequence
// according to their length. The initialized cell state also needs
// to reorder.
ReorderInitState
<
Place
,
T
>
(
device_ctx
,
*
cell_t0
,
order
,
&
ordered_c0
,
true
);
lstm_value
.
prevStateValue
=
ordered_c0
.
data
<
T
>
();
}
...
...
@@ -123,11 +134,16 @@ class LSTMKernel : public framework::OpKernel<T> {
static_cast
<
T
>
(
1.0
),
&
gate_t
,
static_cast
<
T
>
(
1.0
));
}
else
if
(
hidden_t0
)
{
math
::
CopyMatrixRowsFunctor
<
Place
,
T
>
row_shuffle
;
// If n == 0 and there is no initialized hidden state, that is to say
// the H0 is zeros, the calculation W_h * H0 will be skiped.
// If n == 0 and there is initialized hidden state, calculate W_h * H0.
// Since the batch computing for LSTM reorders the input sequence
// according to their length. The initialized hidden state also needs
// to reorder.
Tensor
ordered_h0
;
ordered_h0
.
mutable_data
<
T
>
(
hidden_t0
->
dims
(),
ctx
.
GetPlace
());
const
size_t
*
order
=
batch_gate
->
lod
()[
2
].
data
();
row_shuffle
(
device_ctx
,
*
hidden_t0
,
order
,
ordered_h0
,
true
);
ReorderInitState
<
Place
,
T
>
(
device_ctx
,
*
hidden_t0
,
order
,
&
ordered_h0
,
true
);
math
::
matmul
<
Place
,
T
>
(
device_ctx
,
ordered_h0
,
false
,
*
weight
,
false
,
static_cast
<
T
>
(
1.0
),
&
gate_t
,
static_cast
<
T
>
(
1.0
));
...
...
@@ -187,12 +203,16 @@ class LSTMGradKernel : public framework::OpKernel<T> {
zero
(
device_ctx
,
weight_g
,
static_cast
<
T
>
(
0.0
));
}
// ordered_h0/c0 is the reordered hidden/cell initialization.
// ordered_h0_g/c0_g is the reordered gradient of hidden/cell
// initialization.
Tensor
ordered_h0
,
ordered_c0
,
ordered_h0_g
,
ordered_c0_g
;
math
::
CopyMatrixRowsFunctor
<
Place
,
T
>
row_shuffle
;
const
size_t
*
order
=
batch_gate
->
lod
()[
2
].
data
();
if
(
c0
)
{
ordered_c0
.
mutable_data
<
T
>
(
c0
->
dims
(),
ctx
.
GetPlace
());
row_shuffle
(
device_ctx
,
*
c0
,
order
,
ordered_c0
,
true
);
ReorderInitState
<
Place
,
T
>
(
device_ctx
,
*
c0
,
order
,
&
ordered_c0
,
true
);
}
if
(
c0
&&
c0_g
)
{
ordered_c0_g
.
mutable_data
<
T
>
(
c0_g
->
dims
(),
ctx
.
GetPlace
());
}
auto
in_dims
=
input
->
dims
();
...
...
@@ -231,30 +251,24 @@ class LSTMGradKernel : public framework::OpKernel<T> {
math
::
LoDTensor2BatchFunctor
<
Place
,
T
>
to_batch
;
// use the local variable as here.
LoDTensor
batch_hidden
;
batch_hidden
.
mutable_data
<
T
>
(
out_dims
,
ctx
.
GetPlace
());
batch_hidden
.
set_lod
(
batch_gate
->
lod
());
to_batch
(
device_ctx
,
*
hidden_out
,
batch_hidden
,
false
);
LoDTensor
batch_hidden_g
;
batch_hidden_g
.
mutable_data
<
T
>
(
out_dims
,
ctx
.
GetPlace
());
batch_hidden_g
.
set_lod
(
batch_gate
->
lod
());
to_batch
(
device_ctx
,
*
hidden_g
,
batch_hidden_g
,
false
);
auto
ToBatch
=
[
&
batch_gate
,
&
to_batch
](
const
platform
::
DeviceContext
&
ctx
,
const
framework
::
LoDTensor
&
src
,
const
framework
::
DDim
&
dims
,
framework
::
LoDTensor
&
dst
)
{
dst
.
mutable_data
<
T
>
(
dims
,
ctx
.
GetPlace
());
dst
.
set_lod
(
batch_gate
->
lod
());
to_batch
(
ctx
,
src
,
dst
,
false
);
};
LoDTensor
batch_cell
;
batch_cell
.
mutable_data
<
T
>
(
out_dims
,
ctx
.
GetPlace
()
);
batch_cell
.
set_lod
(
batch_gate
->
lod
()
);
to_batch
(
device_ctx
,
*
cell_out
,
batch_cell
,
false
);
LoDTensor
batch_
hidden
,
batch_hidden_g
,
batch_
cell
;
ToBatch
(
device_ctx
,
*
hidden_out
,
out_dims
,
batch_hidden
);
ToBatch
(
device_ctx
,
*
hidden_g
,
out_dims
,
batch_hidden_g
);
ToBatch
(
device_ctx
,
*
cell_out
,
out_dims
,
batch_cell
);
LoDTensor
batch_cell_g
;
LoDTensor
batch_cell_g
,
batch_gate_g
;
batch_cell_g
.
mutable_data
<
T
>
(
out_dims
,
ctx
.
GetPlace
());
batch_cell_g
.
set_lod
(
batch_gate
->
lod
());
// TODO(qingqing) support the case output cell has gradient.
// to_batch(device_ctx, *cell_g, batch_cell_g, false);
zero
(
device_ctx
,
&
batch_cell_g
,
static_cast
<
T
>
(
0.0
));
LoDTensor
batch_gate_g
;
batch_gate_g
.
mutable_data
<
T
>
(
batch_gate
->
dims
(),
ctx
.
GetPlace
());
batch_gate_g
.
set_lod
(
batch_gate
->
lod
());
...
...
@@ -289,17 +303,8 @@ class LSTMGradKernel : public framework::OpKernel<T> {
lstm_value
.
prevStateValue
=
cell_pre
.
data
<
T
>
();
lstm_grad
.
prevStateGrad
=
cell_pre_g
.
data
<
T
>
();
}
else
{
if
(
c0
)
{
lstm_value
.
prevStateValue
=
ordered_c0
.
data
<
T
>
();
}
else
{
lstm_value
.
prevStateValue
=
nullptr
;
}
if
(
c0
&&
c0_g
)
{
ordered_c0_g
.
mutable_data
<
T
>
(
c0_g
->
dims
(),
ctx
.
GetPlace
());
lstm_grad
.
prevStateGrad
=
ordered_c0_g
.
data
<
T
>
();
}
else
{
lstm_grad
.
prevStateGrad
=
nullptr
;
}
lstm_value
.
prevStateValue
=
c0
?
ordered_c0
.
data
<
T
>
()
:
nullptr
;
lstm_grad
.
prevStateGrad
=
c0_g
?
ordered_c0_g
.
data
<
T
>
()
:
nullptr
;
}
int
cur_batch_size
=
bend
-
bstart
;
...
...
@@ -323,8 +328,7 @@ class LSTMGradKernel : public framework::OpKernel<T> {
}
}
else
{
if
(
h0
&&
weight_g
)
{
ordered_h0
.
mutable_data
<
T
>
(
h0
->
dims
(),
ctx
.
GetPlace
());
row_shuffle
(
device_ctx
,
*
h0
,
order
,
ordered_h0
,
true
);
ReorderInitState
<
Place
,
T
>
(
device_ctx
,
*
h0
,
order
,
&
ordered_h0
,
true
);
math
::
matmul
<
Place
,
T
>
(
device_ctx
,
ordered_h0
,
true
,
gate_g
,
false
,
static_cast
<
T
>
(
1.0
),
weight_g
,
static_cast
<
T
>
(
1.0
));
...
...
@@ -359,12 +363,10 @@ class LSTMGradKernel : public framework::OpKernel<T> {
}
if
(
h0
&&
h0_g
)
{
h0_g
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
row_shuffle
(
device_ctx
,
ordered_h0_g
,
order
,
*
h0_g
,
false
);
ReorderInitState
<
Place
,
T
>
(
device_ctx
,
ordered_h0_g
,
order
,
h0_g
,
false
);
}
if
(
c0
&&
c0_g
)
{
c0_g
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
row_shuffle
(
device_ctx
,
ordered_c0_g
,
order
,
*
c0_g
,
false
);
ReorderInitState
<
Place
,
T
>
(
device_ctx
,
ordered_c0_g
,
order
,
c0_g
,
false
);
}
}
};
...
...
python/paddle/v2/framework/tests/test_lstm_op.py
浏览文件 @
d60fe75a
...
...
@@ -179,36 +179,6 @@ class TestLstmOp(OpTest):
self
.
check_grad
(
[
'Input'
,
'Weight'
,
'Bias'
],
[
'Hidden'
],
max_relative_error
=
5e-4
)
def
test_check_grad_ingore_bias
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Weight'
],
[
'Hidden'
],
max_relative_error
=
5e-4
,
no_grad_set
=
set
(
'Bias'
))
def
test_check_grad_ingore_weight
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Bias'
],
[
'Hidden'
],
max_relative_error
=
5e-4
,
no_grad_set
=
set
(
'Weight'
))
def
test_check_grad_ingore_input
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Weight'
,
'Bias'
],
[
'Hidden'
],
max_relative_error
=
5e-4
,
no_grad_set
=
set
(
'Input'
))
class
TestLstmOpHasInitial
(
TestLstmOp
):
def
set_argument
(
self
):
...
...
@@ -233,15 +203,35 @@ class TestLstmOpHasInitial(TestLstmOp):
[
'Input'
,
'Weight'
,
'Bias'
,
'H0'
,
'C0'
],
[
'Hidden'
],
max_relative_error
=
5e-4
)
# In order to speed up, skip following testing
def
test_check_grad_ingore_bias
(
self
):
return
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Weight'
],
[
'Hidden'
],
max_relative_error
=
5e-4
,
no_grad_set
=
set
(
'Bias'
))
def
test_check_grad_ingore_weight
(
self
):
return
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Bias'
],
[
'Hidden'
],
max_relative_error
=
5e-4
,
no_grad_set
=
set
(
'Weight'
))
def
test_check_grad_ingore_input
(
self
):
return
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Weight'
,
'Bias'
],
[
'Hidden'
],
max_relative_error
=
5e-4
,
no_grad_set
=
set
(
'Input'
))
def
test_check_grad_ingore_h0
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
...
...
@@ -277,16 +267,6 @@ class TestLstmOpRerverse(TestLstmOp):
self
.
is_reverse
=
True
self
.
use_peepholes
=
True
# In order to speed up, skip following testing
def
test_check_grad_ingore_bias
(
self
):
return
def
test_check_grad_ingore_weight
(
self
):
return
def
test_check_grad_ingore_input
(
self
):
return
class
TestLstmOpNotUsePeepholes
(
TestLstmOp
):
def
set_argument
(
self
):
...
...
@@ -301,16 +281,6 @@ class TestLstmOpNotUsePeepholes(TestLstmOp):
self
.
is_reverse
=
True
self
.
use_peepholes
=
False
# In order to speed up, skip following testing
def
test_check_grad_ingore_bias
(
self
):
return
def
test_check_grad_ingore_weight
(
self
):
return
def
test_check_grad_ingore_input
(
self
):
return
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录