Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
7a5b8ffa
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7a5b8ffa
编写于
1月 23, 2018
作者:
Y
Yibing Liu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Pass grad checking for projection weight
上级
552c9012
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
38 addition
and
25 deletion
+38
-25
paddle/operators/lstmp_op.cc
paddle/operators/lstmp_op.cc
+2
-2
paddle/operators/lstmp_op.h
paddle/operators/lstmp_op.h
+9
-9
python/paddle/v2/fluid/tests/test_lstmp_op.py
python/paddle/v2/fluid/tests/test_lstmp_op.py
+27
-14
未找到文件。
paddle/operators/lstmp_op.cc
浏览文件 @
7a5b8ffa
...
...
@@ -217,7 +217,7 @@ class LSTMPOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment
(
R"DOC(
Long-Short Term Memory with Recurrent Projection (LSTMP) Operator.
L
A
TMP is stand LSTM appended by a recurrent projection layer to reduce the
L
S
TMP is stand LSTM appended by a recurrent projection layer to reduce the
number of parameters, espeacially when the output size is relative large.
The formula is as follows:
...
...
@@ -232,7 +232,7 @@ o_t = \sigma(W_{ox}x_{t} + W_{oh}r_{t-1} + W_{oc}c_t + b_o) \\
h_t = o_t \odot act_h(c_t)
r_t = act_
h'
(W_{rh}h_t)
r_t = act_
{h'}
(W_{rh}h_t)
$$
where the W terms denote weight matrices (e.g. $W_{xi}$ is the matrix
...
...
paddle/operators/lstmp_op.h
浏览文件 @
7a5b8ffa
...
...
@@ -365,10 +365,18 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
ActGradCompute
(
cell_act
,
place
,
cur_proj_dev
,
cur_proj_dev
,
proj_g_dev
,
proj_g_dev
);
}
/* hidden state backwarad */
Tensor
out_g
=
batch_hidden_g
.
Slice
(
bstart
,
bend
);
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
proj_g
,
false
,
*
proj_weight
,
true
,
static_cast
<
T
>
(
1.0
),
&
out_g
,
static_cast
<
T
>
(
0.0
));
/* projection weight backward*/
if
(
proj_weight_g
)
{
Tensor
hidden_t
=
batch_hidden
->
Slice
(
bstart
,
bend
);
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
hidden_t
,
true
,
proj_g
,
false
,
static_cast
<
T
>
(
1.0
),
proj_weight_g
,
static_cast
<
T
>
(
1.0
));
}
Tensor
gate
=
batch_gate
->
Slice
(
bstart
,
bend
);
Tensor
cell
=
batch_cell
.
Slice
(
bstart
,
bend
);
...
...
@@ -407,19 +415,12 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
static_cast
<
T
>
(
1.0
),
&
pre_proj_g
,
static_cast
<
T
>
(
1.0
));
if
(
weight_g
)
{
/*
backward weight
*/
/*
weight backward
*/
auto
pre_proj
=
batch_proj
.
Slice
(
pre_h_start
,
pre_h_end
);
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
pre_proj
,
true
,
gate_g
,
false
,
static_cast
<
T
>
(
1.0
),
weight_g
,
static_cast
<
T
>
(
1.0
));
}
if
(
proj_weight_g
)
{
/* backward proj weigh */
Tensor
hidden_t
=
batch_hidden
->
Slice
(
bstart
,
bend
);
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
hidden_t
,
true
,
proj_g
,
false
,
static_cast
<
T
>
(
1.0
),
proj_weight_g
,
static_cast
<
T
>
(
1.0
));
}
}
else
{
if
(
h0
&&
weight_g
)
{
ReorderInitState
<
DeviceContext
,
T
>
(
device_ctx
,
*
h0
,
order
,
...
...
@@ -444,7 +445,6 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
ActGradCompute
(
cell_act
,
place
,
proj0_dev
,
proj0_dev
,
proj0_g_dev
,
proj0_g_dev
);
}
// Tensor proj0_g = proj_g.Slice(bstart, bend);
if
(
h0_g
)
{
math
::
matmul
<
DeviceContext
,
T
>
(
device_ctx
,
proj0_g
,
false
,
*
proj_weight
,
true
,
...
...
python/paddle/v2/fluid/tests/test_lstmp_op.py
浏览文件 @
7a5b8ffa
...
...
@@ -207,8 +207,8 @@ class TestLstmOp(OpTest):
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Weight'
,
'Bias'
],
[
'Projection'
],
max_relative_error
=
5e-3
)
[
'Input'
,
'Weight'
,
'
ProjWeight'
,
'
Bias'
],
[
'Projection'
],
max_relative_error
=
1e-2
)
class
TestLstmOpHasInitial
(
TestLstmOp
):
...
...
@@ -235,8 +235,9 @@ class TestLstmOpHasInitial(TestLstmOp):
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Weight'
,
'Bias'
,
'H0'
,
'C0'
],
[
'Projection'
],
max_relative_error
=
5e-3
)
[
'Input'
,
'Weight'
,
'ProjWeight'
,
'Bias'
,
'H0'
,
'C0'
],
[
'Projection'
],
max_relative_error
=
1e-2
)
def
test_check_grad_ingore_bias
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
...
...
@@ -246,8 +247,8 @@ class TestLstmOpHasInitial(TestLstmOp):
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Weight'
],
[
'Projection'
],
max_relative_error
=
5e-3
,
[
'Input'
,
'
ProjWeight'
,
'
Weight'
],
[
'Projection'
],
max_relative_error
=
1e-2
,
no_grad_set
=
set
(
'Bias'
))
def
test_check_grad_ingore_weight
(
self
):
...
...
@@ -258,10 +259,22 @@ class TestLstmOpHasInitial(TestLstmOp):
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Bias'
],
[
'Projection'
],
max_relative_error
=
5e-3
,
[
'Input'
,
'
ProjWeight'
,
'
Bias'
],
[
'Projection'
],
max_relative_error
=
1e-2
,
no_grad_set
=
set
(
'Weight'
))
def
test_check_grad_ingore_proj_weight
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'OrderedP0'
]
=
np
.
zeros
((
N
,
self
.
P
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchGate'
]
=
np
.
zeros
((
N
,
4
*
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchHidden'
]
=
np
.
zeros
((
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Weight'
,
'Bias'
],
[
'Projection'
],
max_relative_error
=
1e-2
,
no_grad_set
=
set
(
'ProjWeight'
))
def
test_check_grad_ingore_input
(
self
):
N
=
len
(
self
.
lod
[
0
])
-
1
self
.
outputs
[
'OrderedP0'
]
=
np
.
zeros
((
N
,
self
.
P
)).
astype
(
'float64'
)
...
...
@@ -270,8 +283,8 @@ class TestLstmOpHasInitial(TestLstmOp):
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Weight'
,
'Bias'
],
[
'Projection'
],
max_relative_error
=
5e-3
,
[
'Weight'
,
'
ProjWeight'
,
'
Bias'
],
[
'Projection'
],
max_relative_error
=
1e-2
,
no_grad_set
=
set
(
'Input'
))
def
test_check_grad_ingore_h0
(
self
):
...
...
@@ -282,8 +295,8 @@ class TestLstmOpHasInitial(TestLstmOp):
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Weight'
,
'Bias'
,
'C0'
],
[
'Projection'
],
max_relative_error
=
5e-3
,
[
'Input'
,
'Weight'
,
'
ProjWeight'
,
'
Bias'
,
'C0'
],
[
'Projection'
],
max_relative_error
=
1e-2
,
no_grad_set
=
set
(
'H0'
))
def
test_check_grad_ingore_c0
(
self
):
...
...
@@ -294,8 +307,8 @@ class TestLstmOpHasInitial(TestLstmOp):
self
.
outputs
[
'BatchCellPreAct'
]
=
np
.
zeros
(
(
N
,
self
.
D
)).
astype
(
'float64'
)
self
.
check_grad
(
[
'Input'
,
'Weight'
,
'Bias'
,
'H0'
],
[
'Projection'
],
max_relative_error
=
5e-3
,
[
'Input'
,
'Weight'
,
'
ProjWeight'
,
'
Bias'
,
'H0'
],
[
'Projection'
],
max_relative_error
=
1e-2
,
no_grad_set
=
set
(
'C0'
))
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录