Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
8ba8237a
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8ba8237a
编写于
11月 22, 2017
作者:
S
sweetsky0901
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into my_unpool_max_2d
上级
e2a5905e
53bd51e3
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
330 addition
and
73 deletion
+330
-73
paddle/operators/gru_unit_op.cc
paddle/operators/gru_unit_op.cc
+9
-14
paddle/operators/gru_unit_op.h
paddle/operators/gru_unit_op.h
+41
-35
paddle/operators/linear_chain_crf_op.h
paddle/operators/linear_chain_crf_op.h
+2
-2
python/paddle/v2/fluid/layer_helper.py
python/paddle/v2/fluid/layer_helper.py
+4
-1
python/paddle/v2/fluid/layers.py
python/paddle/v2/fluid/layers.py
+44
-1
python/paddle/v2/fluid/optimizer.py
python/paddle/v2/fluid/optimizer.py
+2
-1
python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
...n/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
+192
-0
python/paddle/v2/fluid/tests/test_gru_unit_op.py
python/paddle/v2/fluid/tests/test_gru_unit_op.py
+10
-11
python/paddle/v2/fluid/tests/test_layers.py
python/paddle/v2/fluid/tests/test_layers.py
+25
-7
python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py
python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py
+1
-1
未找到文件。
paddle/operators/gru_unit_op.cc
浏览文件 @
8ba8237a
...
@@ -114,18 +114,19 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -114,18 +114,19 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
.
SetDefault
(
sigmoid
)
.
SetDefault
(
sigmoid
)
.
InEnum
({
identity
,
sigmoid
,
tanh
,
relu
});
.
InEnum
({
identity
,
sigmoid
,
tanh
,
relu
});
AddComment
(
R"DOC(
AddComment
(
R"DOC(
GRUUnit Operator.
GRUUnit Operator implements partial calculations of the GRU unit as following:
This operator implements partial calculations of the GRU unit as follows:
$$
$$
update \ gate: u_t = actGate(xu_t + W_u * h
idden_{prev} + bias
_u) \\
update \ gate: u_t = actGate(xu_t + W_u * h
_{t-1} + b
_u) \\
reset \ gate: r_t = actGate(xr_t + W_r * h
idden_{prev} + bias
_r) \\
reset \ gate: r_t = actGate(xr_t + W_r * h
_{t-1} + b
_r) \\
output \ candidate: {h}_t = actNode(
{xc}_t + W_c * dot(r_t, hidden_{prev}) + bias
_c) \\
output \ candidate: {h}_t = actNode(
xc_t + W_c * dot(r_t, h_{t-1}) + b
_c) \\
output: h_t = dot((1
-u_t), {h}_t) + dot(u_t, hidden_{prev}
)
output: h_t = dot((1
- u_t), h_{t-1}) + dot(u_t, {h}_t
)
$$
$$
The rest of GRU unit can be completed by using FCOp's output as the input of GRUUnitOp.
which is same as one time step of GRU Operator.
@note To implement the complete GRU unit, fully-connected operator must be
used before to feed xu, xr and xc as the Input of GRUUnit operator.
)DOC"
);
)DOC"
);
}
}
...
@@ -150,12 +151,6 @@ class GRUUnitGradOp : public framework::OperatorWithKernel {
...
@@ -150,12 +151,6 @@ class GRUUnitGradOp : public framework::OperatorWithKernel {
"ResetHiddenPrev"
);
"ResetHiddenPrev"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Hidden"
),
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Hidden"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"Hidden"
);
"Input(%s) of GRUUnitGradOp should not be null."
,
"Hidden"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Gate"
)),
"Input(%s@GRAD) of GRUUnitGradOp should not be null."
,
"Gate"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"ResetHiddenPrev"
)),
"Input(%s@GRAD) of GRUUnitGradOp should not be null."
,
"ResetHiddenPrev"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Hidden"
)),
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Hidden"
)),
"Input(%s@GRAD) of GRUUnitGradOp should not be null."
,
"Input(%s@GRAD) of GRUUnitGradOp should not be null."
,
"Hidden"
);
"Hidden"
);
...
...
paddle/operators/gru_unit_op.h
浏览文件 @
8ba8237a
...
@@ -110,7 +110,7 @@ class GRUUnitKernel : public framework::OpKernel<T> {
...
@@ -110,7 +110,7 @@ class GRUUnitKernel : public framework::OpKernel<T> {
auto
c
=
g
.
slice
(
c_offsets
,
extents
);
// output candidate
auto
c
=
g
.
slice
(
c_offsets
,
extents
);
// output candidate
// calculate final output
// calculate final output
h
.
device
(
place
)
=
u
*
(
h_p
-
c
)
+
c
;
h
.
device
(
place
)
=
u
*
(
c
-
h_p
)
+
h_p
;
}
}
};
};
...
@@ -146,35 +146,27 @@ class GRUUnitGradKernel : public framework::OpKernel<T> {
...
@@ -146,35 +146,27 @@ class GRUUnitGradKernel : public framework::OpKernel<T> {
auto
*
weight_grad
=
auto
*
weight_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Weight"
));
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Weight"
));
auto
*
bias_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Bias"
));
auto
*
bias_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Bias"
));
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
hidden_prev_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
weight_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
Tensor
gate_grad
;
Tensor
gate_grad
;
gate_grad
.
mutable_data
<
T
>
(
input
->
dims
(),
context
.
GetPlace
());
Tensor
reset_hidden_prev_grad
;
Tensor
reset_hidden_prev_grad
;
reset_hidden_prev_grad
.
mutable_data
<
T
>
(
reset_hidden_prev
->
dims
(),
context
.
GetPlace
());
int
batch_size
=
input
->
dims
()[
0
];
int
frame_size
=
hidden_prev
->
dims
()[
1
];
const
T
*
hidden_prev_data
=
hidden_prev
->
data
<
T
>
();
const
T
*
hidden_prev_data
=
hidden_prev
->
data
<
T
>
();
T
*
hidden_prev_grad_data
=
hidden_prev_grad
->
data
<
T
>
();
const
T
*
weight_data
=
weight
->
data
<
T
>
();
const
T
*
weight_data
=
weight
->
data
<
T
>
();
T
*
weight_grad_data
=
weight_grad
->
data
<
T
>
();
T
*
gate_grad_data
=
T
*
gate_grad_data
=
gate_grad
.
data
<
T
>
(
);
gate_grad
.
mutable_data
<
T
>
(
input
->
dims
(),
context
.
GetPlace
()
);
const
T
*
reset_hidden_prev_data
=
reset_hidden_prev
->
data
<
T
>
();
const
T
*
reset_hidden_prev_data
=
reset_hidden_prev
->
data
<
T
>
();
T
*
reset_hidden_prev_grad_data
=
reset_hidden_prev_grad
.
data
<
T
>
();
T
*
reset_hidden_prev_grad_data
=
reset_hidden_prev_grad
.
mutable_data
<
T
>
(
reset_hidden_prev
->
dims
(),
context
.
GetPlace
());
auto
h_p
=
EigenMatrix
<
T
>::
From
(
*
hidden_prev
);
auto
h_p
=
EigenMatrix
<
T
>::
From
(
*
hidden_prev
);
auto
g
=
EigenMatrix
<
T
>::
From
(
*
gate
);
auto
g
=
EigenMatrix
<
T
>::
From
(
*
gate
);
auto
d_h
=
EigenMatrix
<
T
>::
From
(
*
hidden_grad
);
auto
d_h
=
EigenMatrix
<
T
>::
From
(
*
hidden_grad
);
auto
d_x
=
EigenMatrix
<
T
>::
From
(
*
input_grad
);
auto
d_h_p
=
EigenMatrix
<
T
>::
From
(
*
hidden_prev_grad
);
auto
d_g
=
EigenMatrix
<
T
>::
From
(
gate_grad
);
auto
d_g
=
EigenMatrix
<
T
>::
From
(
gate_grad
);
auto
d_r_h_p
=
EigenMatrix
<
T
>::
From
(
reset_hidden_prev_grad
);
auto
d_r_h_p
=
EigenMatrix
<
T
>::
From
(
reset_hidden_prev_grad
);
auto
place
=
context
.
GetEigenDevice
<
Place
>
();
auto
place
=
context
.
GetEigenDevice
<
Place
>
();
int
batch_size
=
input
->
dims
()[
0
];
int
frame_size
=
hidden_prev
->
dims
()[
1
];
Eigen
::
array
<
int
,
2
>
extents
({{
batch_size
,
frame_size
}});
Eigen
::
array
<
int
,
2
>
extents
({{
batch_size
,
frame_size
}});
Eigen
::
array
<
int
,
2
>
u_offsets
({{
0
,
0
}});
Eigen
::
array
<
int
,
2
>
u_offsets
({{
0
,
0
}});
auto
u
=
g
.
slice
(
u_offsets
,
extents
);
// update gate
auto
u
=
g
.
slice
(
u_offsets
,
extents
);
// update gate
...
@@ -185,38 +177,52 @@ class GRUUnitGradKernel : public framework::OpKernel<T> {
...
@@ -185,38 +177,52 @@ class GRUUnitGradKernel : public framework::OpKernel<T> {
// backward for unactivated update gate
// backward for unactivated update gate
ActGradCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
u
,
u
,
ActGradCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
u
,
u
,
d_g
.
slice
(
u_offsets
,
extents
),
d_h
*
(
h_p
-
c
));
d_g
.
slice
(
u_offsets
,
extents
),
d_h
*
(
c
-
h_p
));
// backward for unactivated output candidate
// backward for unactivated output candidate
ActGradCompute
(
context
.
Attr
<
int
>
(
"activation"
),
place
,
c
,
c
,
ActGradCompute
(
context
.
Attr
<
int
>
(
"activation"
),
place
,
c
,
c
,
d_g
.
slice
(
c_offsets
,
extents
),
d_h
*
(
u
.
constant
(
T
(
1
))
-
u
)
);
d_g
.
slice
(
c_offsets
,
extents
),
d_h
*
u
);
// backward for reset_hidden_prev
// backward for reset_hidden_prev
math
::
gemm
<
Place
,
T
>
(
context
.
device_context
(),
false
,
true
,
batch_size
,
math
::
gemm
<
Place
,
T
>
(
context
.
device_context
(),
false
,
true
,
batch_size
,
frame_size
,
frame_size
,
1
,
frame_size
,
frame_size
,
1
,
gate_grad_data
+
frame_size
*
2
,
frame_size
*
3
,
gate_grad_data
+
frame_size
*
2
,
frame_size
*
3
,
weight_data
+
frame_size
*
frame_size
*
2
,
frame_size
,
weight_data
+
frame_size
*
frame_size
*
2
,
frame_size
,
0
,
reset_hidden_prev_grad_data
,
frame_size
);
0
,
reset_hidden_prev_grad_data
,
frame_size
);
// backward for state_weight
math
::
gemm
<
Place
,
T
>
(
context
.
device_context
(),
true
,
false
,
frame_size
,
frame_size
,
batch_size
,
1
,
reset_hidden_prev_data
,
frame_size
,
gate_grad_data
+
frame_size
*
2
,
frame_size
*
3
,
0
,
weight_grad_data
+
frame_size
*
frame_size
*
2
,
frame_size
);
// backward for unactivated reset gate
// backward for unactivated reset gate
ActGradCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
r
,
r
,
ActGradCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
r
,
r
,
d_g
.
slice
(
r_offsets
,
extents
),
d_r_h_p
*
h_p
);
d_g
.
slice
(
r_offsets
,
extents
),
d_r_h_p
*
h_p
);
// backward for update_gate_weight and reset_gate_weight
// backward for weight
math
::
gemm
<
Place
,
T
>
(
context
.
device_context
(),
true
,
false
,
frame_size
,
if
(
weight_grad
)
{
frame_size
*
2
,
batch_size
,
1
,
hidden_prev_data
,
T
*
weight_grad_data
=
weight_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
frame_size
,
gate_grad_data
,
frame_size
*
3
,
0
,
// backward for state_weight
weight_grad_data
,
frame_size
*
2
);
math
::
gemm
<
Place
,
T
>
(
context
.
device_context
(),
true
,
false
,
frame_size
,
frame_size
,
batch_size
,
1
,
reset_hidden_prev_data
,
frame_size
,
gate_grad_data
+
frame_size
*
2
,
frame_size
*
3
,
0
,
weight_grad_data
+
frame_size
*
frame_size
*
2
,
frame_size
);
// backward for update_gate_weight and reset_gate_weight
math
::
gemm
<
Place
,
T
>
(
context
.
device_context
(),
true
,
false
,
frame_size
,
frame_size
*
2
,
batch_size
,
1
,
hidden_prev_data
,
frame_size
,
gate_grad_data
,
frame_size
*
3
,
0
,
weight_grad_data
,
frame_size
*
2
);
}
// backward for hidden_prev
// backward for hidden_prev
d_h_p
.
device
(
place
)
=
d_r_h_p
*
r
+
d_h
*
u
;
if
(
hidden_prev_grad
)
{
math
::
gemm
<
Place
,
T
>
(
context
.
device_context
(),
false
,
true
,
batch_size
,
T
*
hidden_prev_grad_data
=
frame_size
,
frame_size
*
2
,
1
,
gate_grad_data
,
hidden_prev_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
frame_size
*
3
,
weight_data
,
frame_size
*
2
,
1
,
auto
d_h_p
=
EigenMatrix
<
T
>::
From
(
*
hidden_prev_grad
);
hidden_prev_grad_data
,
frame_size
);
d_h_p
.
device
(
place
)
=
d_r_h_p
*
r
+
d_h
*
(
u
.
constant
(
T
(
1
))
-
u
);
math
::
gemm
<
Place
,
T
>
(
context
.
device_context
(),
false
,
true
,
batch_size
,
frame_size
,
frame_size
*
2
,
1
,
gate_grad_data
,
frame_size
*
3
,
weight_data
,
frame_size
*
2
,
1
,
hidden_prev_grad_data
,
frame_size
);
}
// backward for input
// backward for input
d_x
.
device
(
place
)
=
d_g
;
if
(
input_grad
)
{
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
d_x
=
EigenMatrix
<
T
>::
From
(
*
input_grad
);
d_x
.
device
(
place
)
=
d_g
;
}
// backward for bias
// backward for bias
if
(
bias_grad
)
{
if
(
bias_grad
)
{
bias_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
bias_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
...
...
paddle/operators/linear_chain_crf_op.h
浏览文件 @
8ba8237a
...
@@ -271,7 +271,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
...
@@ -271,7 +271,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
ll
-=
std
::
log
(
sum
);
ll
-=
std
::
log
(
sum
);
// Now ll is equal to -log(Z).
// Now ll is equal to -log(Z).
const
int
*
lbl
=
label
.
data
<
in
t
>
();
const
int
64_t
*
lbl
=
label
.
data
<
int64_
t
>
();
PADDLE_ENFORCE_LT
(
PADDLE_ENFORCE_LT
(
static_cast
<
size_t
>
(
*
std
::
max_element
(
lbl
,
lbl
+
seq_length
)),
tag_num
,
static_cast
<
size_t
>
(
*
std
::
max_element
(
lbl
,
lbl
+
seq_length
)),
tag_num
,
"An invalid tag label that execesses the largest tag number."
);
"An invalid tag label that execesses the largest tag number."
);
...
@@ -449,7 +449,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
...
@@ -449,7 +449,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
Tensor
*
emission_grad
)
const
{
Tensor
*
emission_grad
)
const
{
const
T
*
w_exps
=
transition_exps
.
data
<
T
>
();
const
T
*
w_exps
=
transition_exps
.
data
<
T
>
();
const
T
*
x_exps
=
emission_exps
.
data
<
T
>
();
const
T
*
x_exps
=
emission_exps
.
data
<
T
>
();
const
int
*
label_value
=
label
.
data
<
in
t
>
();
const
int
64_t
*
label_value
=
label
.
data
<
int64_
t
>
();
T
*
beta_value
=
beta
->
data
<
T
>
();
T
*
beta_value
=
beta
->
data
<
T
>
();
auto
x_dims
=
emission_exps
.
dims
();
auto
x_dims
=
emission_exps
.
dims
();
...
...
python/paddle/v2/fluid/layer_helper.py
浏览文件 @
8ba8237a
...
@@ -126,7 +126,10 @@ class LayerHelper(object):
...
@@ -126,7 +126,10 @@ class LayerHelper(object):
self
.
startup_program
.
global_block
().
create_parameter
(
self
.
startup_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
shape
,
**
attr_copy
)
dtype
=
dtype
,
shape
=
shape
,
**
attr_copy
)
return
self
.
main_program
.
global_block
().
create_parameter
(
return
self
.
main_program
.
global_block
().
create_parameter
(
name
=
attr_copy
[
'name'
],
dtype
=
dtype
,
shape
=
shape
)
name
=
attr_copy
[
'name'
],
dtype
=
dtype
,
shape
=
shape
,
trainable
=
attr_copy
.
get
(
'trainable'
,
True
))
def
create_tmp_variable
(
self
,
dtype
):
def
create_tmp_variable
(
self
,
dtype
):
return
self
.
main_program
.
current_block
().
create_var
(
return
self
.
main_program
.
current_block
().
create_var
(
...
...
python/paddle/v2/fluid/layers.py
浏览文件 @
8ba8237a
...
@@ -112,6 +112,7 @@ def fc(input,
...
@@ -112,6 +112,7 @@ def fc(input,
def
embedding
(
input
,
def
embedding
(
input
,
size
,
size
,
is_sparse
=
False
,
is_sparse
=
False
,
param_initializer
=
None
,
param_attr
=
None
,
param_attr
=
None
,
data_type
=
'float32'
,
data_type
=
'float32'
,
main_program
=
None
,
main_program
=
None
,
...
@@ -136,9 +137,16 @@ def embedding(input,
...
@@ -136,9 +137,16 @@ def embedding(input,
to the LayerHelper constructor.
to the LayerHelper constructor.
"""
"""
def
_get_default_param_initializer
():
return
XavierInitializer
()
helper
=
LayerHelper
(
'embedding'
,
**
locals
())
helper
=
LayerHelper
(
'embedding'
,
**
locals
())
w
=
helper
.
create_parameter
(
w
=
helper
.
create_parameter
(
attr
=
helper
.
param_attr
,
shape
=
size
,
dtype
=
data_type
)
attr
=
helper
.
param_attr
,
shape
=
size
,
dtype
=
data_type
,
initializer
=
param_initializer
or
_get_default_param_initializer
())
tmp
=
helper
.
create_tmp_variable
(
data_type
)
tmp
=
helper
.
create_tmp_variable
(
data_type
)
helper
.
append_op
(
helper
.
append_op
(
type
=
'lookup_table'
,
type
=
'lookup_table'
,
...
@@ -460,6 +468,41 @@ def sums(input, main_program=None, startup_program=None):
...
@@ -460,6 +468,41 @@ def sums(input, main_program=None, startup_program=None):
return
out
return
out
def
linear_chain_crf
(
input
,
label
,
param_attr
=
None
,
param_initializer
=
None
,
main_program
=
None
,
startup_program
=
None
):
def
_get_default_param_initializer
():
return
XavierInitializer
()
helper
=
LayerHelper
(
'linear_chain_crf'
,
**
locals
())
size
=
input
.
shape
[
1
]
transition
=
helper
.
create_parameter
(
attr
=
helper
.
param_attr
,
shape
=
[
size
+
2
,
size
],
dtype
=
helper
.
input_dtype
(),
initializer
=
param_initializer
or
_get_default_param_initializer
())
alpha
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
emission_exps
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
transition_exps
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
log_likelihood
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
helper
.
append_op
(
type
=
'linear_chain_crf'
,
inputs
=
{
"Emission"
:
[
input
],
"Transition"
:
transition
,
"Label"
:
label
},
outputs
=
{
"Alpha"
:
[
alpha
],
"EmissionExps"
:
[
emission_exps
],
"TransitionExps"
:
transition_exps
,
"LogLikelihood"
:
log_likelihood
})
return
log_likelihood
def
assign
(
input
,
output
,
main_program
=
None
,
startup_program
=
None
):
def
assign
(
input
,
output
,
main_program
=
None
,
startup_program
=
None
):
helper
=
LayerHelper
(
'assign'
,
**
locals
())
helper
=
LayerHelper
(
'assign'
,
**
locals
())
helper
.
append_op
(
helper
.
append_op
(
...
...
python/paddle/v2/fluid/optimizer.py
浏览文件 @
8ba8237a
...
@@ -170,7 +170,8 @@ class Optimizer(object):
...
@@ -170,7 +170,8 @@ class Optimizer(object):
optimize_ops
=
[]
optimize_ops
=
[]
for
param_and_grad
in
parameters_and_grads
:
for
param_and_grad
in
parameters_and_grads
:
if
param_and_grad
[
1
]
is
not
None
:
if
param_and_grad
[
0
].
trainable
is
True
and
param_and_grad
[
1
]
is
not
None
:
optimize_op
=
self
.
_append_optimize_op
(
loss
.
block
,
optimize_op
=
self
.
_append_optimize_op
(
loss
.
block
,
param_and_grad
)
param_and_grad
)
optimize_ops
.
append
(
optimize_op
)
optimize_ops
.
append
(
optimize_op
)
...
...
python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
0 → 100644
浏览文件 @
8ba8237a
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.dataset.conll05
as
conll05
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
from
paddle.v2.fluid.executor
import
Executor
,
g_scope
from
paddle.v2.fluid.optimizer
import
SGDOptimizer
word_dict
,
verb_dict
,
label_dict
=
conll05
.
get_dict
()
word_dict_len
=
len
(
word_dict
)
label_dict_len
=
len
(
label_dict
)
pred_len
=
len
(
verb_dict
)
mark_dict_len
=
2
word_dim
=
32
mark_dim
=
5
hidden_dim
=
512
depth
=
8
mix_hidden_lr
=
1e-3
IS_SPARSE
=
True
PASS_NUM
=
10
BATCH_SIZE
=
20
embedding_name
=
'emb'
def
load_parameter
(
file_name
,
h
,
w
):
with
open
(
file_name
,
'rb'
)
as
f
:
f
.
read
(
16
)
# skip header.
return
np
.
fromfile
(
f
,
dtype
=
np
.
float32
).
reshape
(
h
,
w
)
def
db_lstm
():
# 8 features
word
=
layers
.
data
(
name
=
'word_data'
,
shape
=
[
1
],
data_type
=
'int64'
)
predicate
=
layers
.
data
(
name
=
'verb_data'
,
shape
=
[
1
],
data_type
=
'int64'
)
ctx_n2
=
layers
.
data
(
name
=
'ctx_n2_data'
,
shape
=
[
1
],
data_type
=
'int64'
)
ctx_n1
=
layers
.
data
(
name
=
'ctx_n1_data'
,
shape
=
[
1
],
data_type
=
'int64'
)
ctx_0
=
layers
.
data
(
name
=
'ctx_0_data'
,
shape
=
[
1
],
data_type
=
'int64'
)
ctx_p1
=
layers
.
data
(
name
=
'ctx_p1_data'
,
shape
=
[
1
],
data_type
=
'int64'
)
ctx_p2
=
layers
.
data
(
name
=
'ctx_p2_data'
,
shape
=
[
1
],
data_type
=
'int64'
)
mark
=
layers
.
data
(
name
=
'mark_data'
,
shape
=
[
1
],
data_type
=
'int64'
)
predicate_embedding
=
layers
.
embedding
(
input
=
predicate
,
size
=
[
pred_len
,
word_dim
],
data_type
=
'float32'
,
is_sparse
=
IS_SPARSE
,
param_attr
=
{
'name'
:
'vemb'
})
mark_embedding
=
layers
.
embedding
(
input
=
mark
,
size
=
[
mark_dict_len
,
mark_dim
],
data_type
=
'float32'
,
is_sparse
=
IS_SPARSE
)
word_input
=
[
word
,
ctx_n2
,
ctx_n1
,
ctx_0
,
ctx_p1
,
ctx_p2
]
emb_layers
=
[
layers
.
embedding
(
size
=
[
word_dict_len
,
word_dim
],
input
=
x
,
param_attr
=
{
'name'
:
embedding_name
,
'trainable'
:
False
})
for
x
in
word_input
]
emb_layers
.
append
(
predicate_embedding
)
emb_layers
.
append
(
mark_embedding
)
hidden_0_layers
=
[
layers
.
fc
(
input
=
emb
,
size
=
hidden_dim
)
for
emb
in
emb_layers
]
hidden_0
=
layers
.
sums
(
input
=
hidden_0_layers
)
lstm_0
=
layers
.
dynamic_lstm
(
input
=
hidden_0
,
size
=
hidden_dim
,
candidate_activation
=
'relu'
,
gate_activation
=
'sigmoid'
,
cell_activation
=
'sigmoid'
)
# stack L-LSTM and R-LSTM with direct edges
input_tmp
=
[
hidden_0
,
lstm_0
]
for
i
in
range
(
1
,
depth
):
mix_hidden
=
layers
.
sums
(
input
=
[
layers
.
fc
(
input
=
input_tmp
[
0
],
size
=
hidden_dim
),
layers
.
fc
(
input
=
input_tmp
[
1
],
size
=
hidden_dim
)
])
lstm
=
layers
.
dynamic_lstm
(
input
=
mix_hidden
,
size
=
hidden_dim
,
candidate_activation
=
'relu'
,
gate_activation
=
'sigmoid'
,
cell_activation
=
'sigmoid'
,
is_reverse
=
((
i
%
2
)
==
1
))
input_tmp
=
[
mix_hidden
,
lstm
]
feature_out
=
layers
.
sums
(
input
=
[
layers
.
fc
(
input
=
input_tmp
[
0
],
size
=
label_dict_len
),
layers
.
fc
(
input
=
input_tmp
[
1
],
size
=
label_dict_len
)
])
return
feature_out
def
to_lodtensor
(
data
,
place
):
seq_lens
=
[
len
(
seq
)
for
seq
in
data
]
cur_len
=
0
lod
=
[
cur_len
]
for
l
in
seq_lens
:
cur_len
+=
l
lod
.
append
(
cur_len
)
flattened_data
=
np
.
concatenate
(
data
,
axis
=
0
).
astype
(
"int64"
)
flattened_data
=
flattened_data
.
reshape
([
len
(
flattened_data
),
1
])
res
=
core
.
LoDTensor
()
res
.
set
(
flattened_data
,
place
)
res
.
set_lod
([
lod
])
return
res
def
main
():
# define network topology
feature_out
=
db_lstm
()
target
=
layers
.
data
(
name
=
'target'
,
shape
=
[
1
],
data_type
=
'int64'
)
crf_cost
=
layers
.
linear_chain_crf
(
input
=
feature_out
,
label
=
target
,
param_attr
=
{
"name"
:
'crfw'
,
"learning_rate"
:
mix_hidden_lr
})
avg_cost
=
layers
.
mean
(
x
=
crf_cost
)
# TODO(qiao)
# 1. add crf_decode_layer and evaluator
# 2. use other optimizer and check why out will be NAN
sgd_optimizer
=
SGDOptimizer
(
learning_rate
=
0.0001
)
opts
=
sgd_optimizer
.
minimize
(
avg_cost
)
train_data
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
conll05
.
test
(),
buf_size
=
8192
),
batch_size
=
BATCH_SIZE
)
place
=
core
.
CPUPlace
()
exe
=
Executor
(
place
)
exe
.
run
(
framework
.
default_startup_program
())
embedding_param
=
g_scope
.
find_var
(
embedding_name
).
get_tensor
()
embedding_param
.
set
(
load_parameter
(
conll05
.
get_embedding
(),
word_dict_len
,
word_dim
),
place
)
batch_id
=
0
for
pass_id
in
xrange
(
PASS_NUM
):
for
data
in
train_data
():
word_data
=
to_lodtensor
(
map
(
lambda
x
:
x
[
0
],
data
),
place
)
ctx_n2_data
=
to_lodtensor
(
map
(
lambda
x
:
x
[
1
],
data
),
place
)
ctx_n1_data
=
to_lodtensor
(
map
(
lambda
x
:
x
[
2
],
data
),
place
)
ctx_0_data
=
to_lodtensor
(
map
(
lambda
x
:
x
[
3
],
data
),
place
)
ctx_p1_data
=
to_lodtensor
(
map
(
lambda
x
:
x
[
4
],
data
),
place
)
ctx_p2_data
=
to_lodtensor
(
map
(
lambda
x
:
x
[
5
],
data
),
place
)
verb_data
=
to_lodtensor
(
map
(
lambda
x
:
x
[
6
],
data
),
place
)
mark_data
=
to_lodtensor
(
map
(
lambda
x
:
x
[
7
],
data
),
place
)
target
=
to_lodtensor
(
map
(
lambda
x
:
x
[
8
],
data
),
place
)
outs
=
exe
.
run
(
framework
.
default_main_program
(),
feed
=
{
'word_data'
:
word_data
,
'ctx_n2_data'
:
ctx_n2_data
,
'ctx_n1_data'
:
ctx_n1_data
,
'ctx_0_data'
:
ctx_0_data
,
'ctx_p1_data'
:
ctx_p1_data
,
'ctx_p2_data'
:
ctx_p2_data
,
'verb_data'
:
verb_data
,
'mark_data'
:
mark_data
,
'target'
:
target
},
fetch_list
=
[
avg_cost
])
avg_cost_val
=
np
.
array
(
outs
[
0
])
if
batch_id
%
10
==
0
:
print
(
"avg_cost="
+
str
(
avg_cost_val
))
# exit early for CI
exit
(
0
)
batch_id
=
batch_id
+
1
if
__name__
==
'__main__'
:
main
()
python/paddle/v2/fluid/tests/test_gru_unit_op.py
浏览文件 @
8ba8237a
...
@@ -28,8 +28,8 @@ def relu(x):
...
@@ -28,8 +28,8 @@ def relu(x):
class
TestGRUUnitOp
(
OpTest
):
class
TestGRUUnitOp
(
OpTest
):
batch_size
=
3
batch_size
=
5
frame_size
=
5
frame_size
=
10
activate
=
{
activate
=
{
GRUActivationType
.
identity
:
identity
,
GRUActivationType
.
identity
:
identity
,
GRUActivationType
.
sigmoid
:
sigmoid
,
GRUActivationType
.
sigmoid
:
sigmoid
,
...
@@ -77,7 +77,7 @@ class TestGRUUnitOp(OpTest):
...
@@ -77,7 +77,7 @@ class TestGRUUnitOp(OpTest):
c
=
self
.
activate
[
self
.
attrs
[
'activation'
]](
np
.
dot
(
r_h_p
,
w_c
)
+
c
=
self
.
activate
[
self
.
attrs
[
'activation'
]](
np
.
dot
(
r_h_p
,
w_c
)
+
g
[:,
frame_size
*
2
:])
g
[:,
frame_size
*
2
:])
g
=
np
.
hstack
((
u_r
,
c
))
g
=
np
.
hstack
((
u_r
,
c
))
h
=
u
*
h_p
+
(
1
-
u
)
*
c
h
=
u
*
c
+
(
1
-
u
)
*
h_p
self
.
outputs
=
{
self
.
outputs
=
{
'Gate'
:
g
.
astype
(
'float64'
),
'Gate'
:
g
.
astype
(
'float64'
),
'ResetHiddenPrev'
:
r_h_p
.
astype
(
'float64'
),
'ResetHiddenPrev'
:
r_h_p
.
astype
(
'float64'
),
...
@@ -92,10 +92,7 @@ class TestGRUUnitOp(OpTest):
...
@@ -92,10 +92,7 @@ class TestGRUUnitOp(OpTest):
self
.
check_output
()
self
.
check_output
()
def
test_check_grad
(
self
):
def
test_check_grad
(
self
):
self
.
check_grad
(
self
.
check_grad
([
'Input'
,
'HiddenPrev'
,
'Weight'
],
[
'Hidden'
])
[
'Input'
,
'HiddenPrev'
,
'Weight'
],
[
'Hidden'
,
'ResetHiddenPrev'
,
'Gate'
],
max_relative_error
=
0.007
)
class
TestGRUUnitOpWithBias
(
TestGRUUnitOp
):
class
TestGRUUnitOpWithBias
(
TestGRUUnitOp
):
...
@@ -104,18 +101,20 @@ class TestGRUUnitOpWithBias(TestGRUUnitOp):
...
@@ -104,18 +101,20 @@ class TestGRUUnitOpWithBias(TestGRUUnitOp):
frame_size
=
self
.
frame_size
frame_size
=
self
.
frame_size
super
(
TestGRUUnitOpWithBias
,
self
).
set_inputs
()
super
(
TestGRUUnitOpWithBias
,
self
).
set_inputs
()
self
.
inputs
[
'Bias'
]
=
np
.
random
.
uniform
(
self
.
inputs
[
'Bias'
]
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
1
,
frame_size
*
3
)).
astype
(
'float
32
'
)
-
0.1
,
0.1
,
(
1
,
frame_size
*
3
)).
astype
(
'float
64
'
)
self
.
attrs
=
{
self
.
attrs
=
{
'activation'
:
GRUActivationType
.
identity
,
'activation'
:
GRUActivationType
.
identity
,
'gate_activation'
:
GRUActivationType
.
sigmoid
'gate_activation'
:
GRUActivationType
.
sigmoid
}
}
def
test_check_grad
(
self
):
def
test_check_grad
(
self
):
self
.
check_grad
([
'Input'
,
'HiddenPrev'
,
'Weight'
,
'Bias'
],
[
'Hidden'
])
def
test_check_grad_ingore_input
(
self
):
self
.
check_grad
(
self
.
check_grad
(
[
'
Input'
,
'
HiddenPrev'
,
'Weight'
,
'Bias'
],
[
'Hidden'
],
[
'HiddenPrev'
,
'Weight'
,
'Bias'
],
[
'Hidden'
],
max_relative_error
=
0.007
)
no_grad_set
=
set
(
'Input'
)
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
exit
(
0
)
# FIXME(yuyang18): This unittest is not pass. Fix it later
unittest
.
main
()
unittest
.
main
()
python/paddle/v2/fluid/tests/test_layers.py
浏览文件 @
8ba8237a
import
unittest
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
import
paddle.v2.fluid.nets
as
nets
from
paddle.v2.fluid.framework
import
Program
from
paddle.v2.fluid.framework
import
Program
import
paddle.v2.fluid.core
as
core
import
unittest
class
TestBook
(
unittest
.
TestCase
):
class
TestBook
(
unittest
.
TestCase
):
...
@@ -20,7 +20,8 @@ class TestBook(unittest.TestCase):
...
@@ -20,7 +20,8 @@ class TestBook(unittest.TestCase):
avg_cost
=
layers
.
mean
(
x
=
cost
,
main_program
=
program
)
avg_cost
=
layers
.
mean
(
x
=
cost
,
main_program
=
program
)
self
.
assertIsNotNone
(
avg_cost
)
self
.
assertIsNotNone
(
avg_cost
)
program
.
append_backward
(
avg_cost
)
program
.
append_backward
(
avg_cost
)
print
str
(
program
)
# print str(program)
def
test_recognize_digits_mlp
(
self
):
def
test_recognize_digits_mlp
(
self
):
program
=
Program
()
program
=
Program
()
...
@@ -49,7 +50,7 @@ class TestBook(unittest.TestCase):
...
@@ -49,7 +50,7 @@ class TestBook(unittest.TestCase):
input
=
predict
,
label
=
label
,
main_program
=
program
)
input
=
predict
,
label
=
label
,
main_program
=
program
)
avg_cost
=
layers
.
mean
(
x
=
cost
,
main_program
=
program
)
avg_cost
=
layers
.
mean
(
x
=
cost
,
main_program
=
program
)
self
.
assertIsNotNone
(
avg_cost
)
self
.
assertIsNotNone
(
avg_cost
)
print
str
(
program
)
#
print str(program)
def
test_simple_conv2d
(
self
):
def
test_simple_conv2d
(
self
):
program
=
Program
()
program
=
Program
()
...
@@ -64,7 +65,7 @@ class TestBook(unittest.TestCase):
...
@@ -64,7 +65,7 @@ class TestBook(unittest.TestCase):
filter_size
=
[
4
,
4
],
filter_size
=
[
4
,
4
],
main_program
=
program
)
main_program
=
program
)
print
str
(
program
)
#
print str(program)
def
test_recognize_digits_conv
(
self
):
def
test_recognize_digits_conv
(
self
):
program
=
Program
()
program
=
Program
()
...
@@ -103,7 +104,7 @@ class TestBook(unittest.TestCase):
...
@@ -103,7 +104,7 @@ class TestBook(unittest.TestCase):
program
.
append_backward
(
avg_cost
)
program
.
append_backward
(
avg_cost
)
print
str
(
program
)
#
print str(program)
def
test_word_embedding
(
self
):
def
test_word_embedding
(
self
):
program
=
Program
()
program
=
Program
()
...
@@ -164,7 +165,24 @@ class TestBook(unittest.TestCase):
...
@@ -164,7 +165,24 @@ class TestBook(unittest.TestCase):
avg_cost
=
layers
.
mean
(
x
=
cost
,
main_program
=
program
)
avg_cost
=
layers
.
mean
(
x
=
cost
,
main_program
=
program
)
self
.
assertIsNotNone
(
avg_cost
)
self
.
assertIsNotNone
(
avg_cost
)
print
str
(
program
)
# print str(program)
def
test_linear_chain_crf
(
self
):
program
=
Program
()
# Change g_program, so the rest layers use `g_program`
images
=
layers
.
data
(
name
=
'pixel'
,
shape
=
[
784
],
data_type
=
'float32'
,
main_program
=
program
)
label
=
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
data_type
=
'int32'
,
main_program
=
program
)
hidden
=
layers
.
fc
(
input
=
images
,
size
=
128
,
main_program
=
program
)
crf
=
layers
.
linear_chain_crf
(
input
=
hidden
,
label
=
label
,
main_program
=
program
)
# print str(program)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py
浏览文件 @
8ba8237a
...
@@ -104,7 +104,7 @@ class TestLinearChainCrfOp(OpTest):
...
@@ -104,7 +104,7 @@ class TestLinearChainCrfOp(OpTest):
transition_exps
=
np
.
exp
(
transition
)
transition_exps
=
np
.
exp
(
transition
)
labels
=
np
.
random
.
randint
(
labels
=
np
.
random
.
randint
(
low
=
0
,
high
=
TAG_NUM
,
size
=
(
lod
[
-
1
][
-
1
],
1
),
dtype
=
"int
32
"
)
low
=
0
,
high
=
TAG_NUM
,
size
=
(
lod
[
-
1
][
-
1
],
1
),
dtype
=
"int
64
"
)
self
.
inputs
=
{
self
.
inputs
=
{
"Emission"
:
(
emission
,
lod
),
"Emission"
:
(
emission
,
lod
),
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录