Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
8ba8237a
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8ba8237a
编写于
11月 22, 2017
作者:
S
sweetsky0901
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into my_unpool_max_2d
上级
e2a5905e
53bd51e3
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
330 addition
and
73 deletion
+330
-73
paddle/operators/gru_unit_op.cc
paddle/operators/gru_unit_op.cc
+9
-14
paddle/operators/gru_unit_op.h
paddle/operators/gru_unit_op.h
+41
-35
paddle/operators/linear_chain_crf_op.h
paddle/operators/linear_chain_crf_op.h
+2
-2
python/paddle/v2/fluid/layer_helper.py
python/paddle/v2/fluid/layer_helper.py
+4
-1
python/paddle/v2/fluid/layers.py
python/paddle/v2/fluid/layers.py
+44
-1
python/paddle/v2/fluid/optimizer.py
python/paddle/v2/fluid/optimizer.py
+2
-1
python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
...n/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
+192
-0
python/paddle/v2/fluid/tests/test_gru_unit_op.py
python/paddle/v2/fluid/tests/test_gru_unit_op.py
+10
-11
python/paddle/v2/fluid/tests/test_layers.py
python/paddle/v2/fluid/tests/test_layers.py
+25
-7
python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py
python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py
+1
-1
未找到文件。
paddle/operators/gru_unit_op.cc
浏览文件 @
8ba8237a
...
...
@@ -114,18 +114,19 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
.
SetDefault
(
sigmoid
)
.
InEnum
({
identity
,
sigmoid
,
tanh
,
relu
});
AddComment
(
R"DOC(
GRUUnit Operator.
This operator implements partial calculations of the GRU unit as follows:
GRUUnit Operator implements partial calculations of the GRU unit as following:
$$
update \ gate: u_t = actGate(xu_t + W_u * h
idden_{prev} + bias
_u) \\
reset \ gate: r_t = actGate(xr_t + W_r * h
idden_{prev} + bias
_r) \\
output \ candidate: {h}_t = actNode(
{xc}_t + W_c * dot(r_t, hidden_{prev}) + bias
_c) \\
output: h_t = dot((1
-u_t), {h}_t) + dot(u_t, hidden_{prev}
)
update \ gate: u_t = actGate(xu_t + W_u * h
_{t-1} + b
_u) \\
reset \ gate: r_t = actGate(xr_t + W_r * h
_{t-1} + b
_r) \\
output \ candidate: {h}_t = actNode(
xc_t + W_c * dot(r_t, h_{t-1}) + b
_c) \\
output: h_t = dot((1
- u_t), h_{t-1}) + dot(u_t, {h}_t
)
$$
The rest of GRU unit can be completed by using FCOp's output as the input of GRUUnitOp.
which is same as one time step of GRU Operator.
@note To implement the complete GRU unit, fully-connected operator must be
used before to feed xu, xr and xc as the Input of GRUUnit operator.
)DOC"
);
}
...
...
@@ -150,12 +151,6 @@ class GRUUnitGradOp : public framework::OperatorWithKernel {
"ResetHiddenPrev"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Hidden"
),
"Input(%s) of GRUUnitGradOp should not be null."
,
"Hidden"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Gate"
)),
"Input(%s@GRAD) of GRUUnitGradOp should not be null."
,
"Gate"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"ResetHiddenPrev"
)),
"Input(%s@GRAD) of GRUUnitGradOp should not be null."
,
"ResetHiddenPrev"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Hidden"
)),
"Input(%s@GRAD) of GRUUnitGradOp should not be null."
,
"Hidden"
);
...
...
paddle/operators/gru_unit_op.h
浏览文件 @
8ba8237a
...
...
@@ -110,7 +110,7 @@ class GRUUnitKernel : public framework::OpKernel<T> {
auto
c
=
g
.
slice
(
c_offsets
,
extents
);
// output candidate
// calculate final output
h
.
device
(
place
)
=
u
*
(
h_p
-
c
)
+
c
;
h
.
device
(
place
)
=
u
*
(
c
-
h_p
)
+
h_p
;
}
};
...
...
@@ -146,35 +146,27 @@ class GRUUnitGradKernel : public framework::OpKernel<T> {
auto
*
weight_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Weight"
));
auto
*
bias_grad
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Bias"
));
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
hidden_prev_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
weight_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
Tensor
gate_grad
;
gate_grad
.
mutable_data
<
T
>
(
input
->
dims
(),
context
.
GetPlace
());
Tensor
reset_hidden_prev_grad
;
reset_hidden_prev_grad
.
mutable_data
<
T
>
(
reset_hidden_prev
->
dims
(),
context
.
GetPlace
());
int
batch_size
=
input
->
dims
()[
0
];
int
frame_size
=
hidden_prev
->
dims
()[
1
];
const
T
*
hidden_prev_data
=
hidden_prev
->
data
<
T
>
();
T
*
hidden_prev_grad_data
=
hidden_prev_grad
->
data
<
T
>
();
const
T
*
weight_data
=
weight
->
data
<
T
>
();
T
*
weight_grad_data
=
weight_grad
->
data
<
T
>
();
T
*
gate_grad_data
=
gate_grad
.
data
<
T
>
(
);
T
*
gate_grad_data
=
gate_grad
.
mutable_data
<
T
>
(
input
->
dims
(),
context
.
GetPlace
()
);
const
T
*
reset_hidden_prev_data
=
reset_hidden_prev
->
data
<
T
>
();
T
*
reset_hidden_prev_grad_data
=
reset_hidden_prev_grad
.
data
<
T
>
();
T
*
reset_hidden_prev_grad_data
=
reset_hidden_prev_grad
.
mutable_data
<
T
>
(
reset_hidden_prev
->
dims
(),
context
.
GetPlace
());
auto
h_p
=
EigenMatrix
<
T
>::
From
(
*
hidden_prev
);
auto
g
=
EigenMatrix
<
T
>::
From
(
*
gate
);
auto
d_h
=
EigenMatrix
<
T
>::
From
(
*
hidden_grad
);
auto
d_x
=
EigenMatrix
<
T
>::
From
(
*
input_grad
);
auto
d_h_p
=
EigenMatrix
<
T
>::
From
(
*
hidden_prev_grad
);
auto
d_g
=
EigenMatrix
<
T
>::
From
(
gate_grad
);
auto
d_r_h_p
=
EigenMatrix
<
T
>::
From
(
reset_hidden_prev_grad
);
auto
place
=
context
.
GetEigenDevice
<
Place
>
();
int
batch_size
=
input
->
dims
()[
0
];
int
frame_size
=
hidden_prev
->
dims
()[
1
];
Eigen
::
array
<
int
,
2
>
extents
({{
batch_size
,
frame_size
}});
Eigen
::
array
<
int
,
2
>
u_offsets
({{
0
,
0
}});
auto
u
=
g
.
slice
(
u_offsets
,
extents
);
// update gate
...
...
@@ -185,38 +177,52 @@ class GRUUnitGradKernel : public framework::OpKernel<T> {
// backward for unactivated update gate
ActGradCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
u
,
u
,
d_g
.
slice
(
u_offsets
,
extents
),
d_h
*
(
h_p
-
c
));
d_g
.
slice
(
u_offsets
,
extents
),
d_h
*
(
c
-
h_p
));
// backward for unactivated output candidate
ActGradCompute
(
context
.
Attr
<
int
>
(
"activation"
),
place
,
c
,
c
,
d_g
.
slice
(
c_offsets
,
extents
),
d_h
*
(
u
.
constant
(
T
(
1
))
-
u
)
);
d_g
.
slice
(
c_offsets
,
extents
),
d_h
*
u
);
// backward for reset_hidden_prev
math
::
gemm
<
Place
,
T
>
(
context
.
device_context
(),
false
,
true
,
batch_size
,
frame_size
,
frame_size
,
1
,
gate_grad_data
+
frame_size
*
2
,
frame_size
*
3
,
weight_data
+
frame_size
*
frame_size
*
2
,
frame_size
,
0
,
reset_hidden_prev_grad_data
,
frame_size
);
// backward for state_weight
math
::
gemm
<
Place
,
T
>
(
context
.
device_context
(),
true
,
false
,
frame_size
,
frame_size
,
batch_size
,
1
,
reset_hidden_prev_data
,
frame_size
,
gate_grad_data
+
frame_size
*
2
,
frame_size
*
3
,
0
,
weight_grad_data
+
frame_size
*
frame_size
*
2
,
frame_size
);
// backward for unactivated reset gate
ActGradCompute
(
context
.
Attr
<
int
>
(
"gate_activation"
),
place
,
r
,
r
,
d_g
.
slice
(
r_offsets
,
extents
),
d_r_h_p
*
h_p
);
// backward for update_gate_weight and reset_gate_weight
math
::
gemm
<
Place
,
T
>
(
context
.
device_context
(),
true
,
false
,
frame_size
,
frame_size
*
2
,
batch_size
,
1
,
hidden_prev_data
,
frame_size
,
gate_grad_data
,
frame_size
*
3
,
0
,
weight_grad_data
,
frame_size
*
2
);
// backward for weight
if
(
weight_grad
)
{
T
*
weight_grad_data
=
weight_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
// backward for state_weight
math
::
gemm
<
Place
,
T
>
(
context
.
device_context
(),
true
,
false
,
frame_size
,
frame_size
,
batch_size
,
1
,
reset_hidden_prev_data
,
frame_size
,
gate_grad_data
+
frame_size
*
2
,
frame_size
*
3
,
0
,
weight_grad_data
+
frame_size
*
frame_size
*
2
,
frame_size
);
// backward for update_gate_weight and reset_gate_weight
math
::
gemm
<
Place
,
T
>
(
context
.
device_context
(),
true
,
false
,
frame_size
,
frame_size
*
2
,
batch_size
,
1
,
hidden_prev_data
,
frame_size
,
gate_grad_data
,
frame_size
*
3
,
0
,
weight_grad_data
,
frame_size
*
2
);
}
// backward for hidden_prev
d_h_p
.
device
(
place
)
=
d_r_h_p
*
r
+
d_h
*
u
;
math
::
gemm
<
Place
,
T
>
(
context
.
device_context
(),
false
,
true
,
batch_size
,
frame_size
,
frame_size
*
2
,
1
,
gate_grad_data
,
frame_size
*
3
,
weight_data
,
frame_size
*
2
,
1
,
hidden_prev_grad_data
,
frame_size
);
if
(
hidden_prev_grad
)
{
T
*
hidden_prev_grad_data
=
hidden_prev_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
d_h_p
=
EigenMatrix
<
T
>::
From
(
*
hidden_prev_grad
);
d_h_p
.
device
(
place
)
=
d_r_h_p
*
r
+
d_h
*
(
u
.
constant
(
T
(
1
))
-
u
);
math
::
gemm
<
Place
,
T
>
(
context
.
device_context
(),
false
,
true
,
batch_size
,
frame_size
,
frame_size
*
2
,
1
,
gate_grad_data
,
frame_size
*
3
,
weight_data
,
frame_size
*
2
,
1
,
hidden_prev_grad_data
,
frame_size
);
}
// backward for input
d_x
.
device
(
place
)
=
d_g
;
if
(
input_grad
)
{
input_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
d_x
=
EigenMatrix
<
T
>::
From
(
*
input_grad
);
d_x
.
device
(
place
)
=
d_g
;
}
// backward for bias
if
(
bias_grad
)
{
bias_grad
->
mutable_data
<
T
>
(
context
.
GetPlace
());
...
...
paddle/operators/linear_chain_crf_op.h
浏览文件 @
8ba8237a
...
...
@@ -271,7 +271,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
ll
-=
std
::
log
(
sum
);
// Now ll is equal to -log(Z).
const
int
*
lbl
=
label
.
data
<
in
t
>
();
const
int
64_t
*
lbl
=
label
.
data
<
int64_
t
>
();
PADDLE_ENFORCE_LT
(
static_cast
<
size_t
>
(
*
std
::
max_element
(
lbl
,
lbl
+
seq_length
)),
tag_num
,
"An invalid tag label that execesses the largest tag number."
);
...
...
@@ -449,7 +449,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
Tensor
*
emission_grad
)
const
{
const
T
*
w_exps
=
transition_exps
.
data
<
T
>
();
const
T
*
x_exps
=
emission_exps
.
data
<
T
>
();
const
int
*
label_value
=
label
.
data
<
in
t
>
();
const
int
64_t
*
label_value
=
label
.
data
<
int64_
t
>
();
T
*
beta_value
=
beta
->
data
<
T
>
();
auto
x_dims
=
emission_exps
.
dims
();
...
...
python/paddle/v2/fluid/layer_helper.py
浏览文件 @
8ba8237a
...
...
@@ -126,7 +126,10 @@ class LayerHelper(object):
self
.
startup_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
shape
,
**
attr_copy
)
return
self
.
main_program
.
global_block
().
create_parameter
(
name
=
attr_copy
[
'name'
],
dtype
=
dtype
,
shape
=
shape
)
name
=
attr_copy
[
'name'
],
dtype
=
dtype
,
shape
=
shape
,
trainable
=
attr_copy
.
get
(
'trainable'
,
True
))
def
create_tmp_variable
(
self
,
dtype
):
return
self
.
main_program
.
current_block
().
create_var
(
...
...
python/paddle/v2/fluid/layers.py
浏览文件 @
8ba8237a
...
...
@@ -112,6 +112,7 @@ def fc(input,
def
embedding
(
input
,
size
,
is_sparse
=
False
,
param_initializer
=
None
,
param_attr
=
None
,
data_type
=
'float32'
,
main_program
=
None
,
...
...
@@ -136,9 +137,16 @@ def embedding(input,
to the LayerHelper constructor.
"""
def
_get_default_param_initializer
():
return
XavierInitializer
()
helper
=
LayerHelper
(
'embedding'
,
**
locals
())
w
=
helper
.
create_parameter
(
attr
=
helper
.
param_attr
,
shape
=
size
,
dtype
=
data_type
)
attr
=
helper
.
param_attr
,
shape
=
size
,
dtype
=
data_type
,
initializer
=
param_initializer
or
_get_default_param_initializer
())
tmp
=
helper
.
create_tmp_variable
(
data_type
)
helper
.
append_op
(
type
=
'lookup_table'
,
...
...
@@ -460,6 +468,41 @@ def sums(input, main_program=None, startup_program=None):
return
out
def
linear_chain_crf
(
input
,
label
,
param_attr
=
None
,
param_initializer
=
None
,
main_program
=
None
,
startup_program
=
None
):
def
_get_default_param_initializer
():
return
XavierInitializer
()
helper
=
LayerHelper
(
'linear_chain_crf'
,
**
locals
())
size
=
input
.
shape
[
1
]
transition
=
helper
.
create_parameter
(
attr
=
helper
.
param_attr
,
shape
=
[
size
+
2
,
size
],
dtype
=
helper
.
input_dtype
(),
initializer
=
param_initializer
or
_get_default_param_initializer
())
alpha
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
emission_exps
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
transition_exps
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
log_likelihood
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
helper
.
append_op
(
type
=
'linear_chain_crf'
,
inputs
=
{
"Emission"
:
[
input
],
"Transition"
:
transition
,
"Label"
:
label
},
outputs
=
{
"Alpha"
:
[
alpha
],
"EmissionExps"
:
[
emission_exps
],
"TransitionExps"
:
transition_exps
,
"LogLikelihood"
:
log_likelihood
})
return
log_likelihood
def
assign
(
input
,
output
,
main_program
=
None
,
startup_program
=
None
):
helper
=
LayerHelper
(
'assign'
,
**
locals
())
helper
.
append_op
(
...
...
python/paddle/v2/fluid/optimizer.py
浏览文件 @
8ba8237a
...
...
@@ -170,7 +170,8 @@ class Optimizer(object):
optimize_ops
=
[]
for
param_and_grad
in
parameters_and_grads
:
if
param_and_grad
[
1
]
is
not
None
:
if
param_and_grad
[
0
].
trainable
is
True
and
param_and_grad
[
1
]
is
not
None
:
optimize_op
=
self
.
_append_optimize_op
(
loss
.
block
,
param_and_grad
)
optimize_ops
.
append
(
optimize_op
)
...
...
python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
0 → 100644
浏览文件 @
8ba8237a
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.dataset.conll05
as
conll05
import
paddle.v2.fluid.core
as
core
import
paddle.v2.fluid.framework
as
framework
import
paddle.v2.fluid.layers
as
layers
from
paddle.v2.fluid.executor
import
Executor
,
g_scope
from
paddle.v2.fluid.optimizer
import
SGDOptimizer
word_dict
,
verb_dict
,
label_dict
=
conll05
.
get_dict
()
word_dict_len
=
len
(
word_dict
)
label_dict_len
=
len
(
label_dict
)
pred_len
=
len
(
verb_dict
)
mark_dict_len
=
2
word_dim
=
32
mark_dim
=
5
hidden_dim
=
512
depth
=
8
mix_hidden_lr
=
1e-3
IS_SPARSE
=
True
PASS_NUM
=
10
BATCH_SIZE
=
20
embedding_name
=
'emb'
def
load_parameter
(
file_name
,
h
,
w
):
with
open
(
file_name
,
'rb'
)
as
f
:
f
.
read
(
16
)
# skip header.
return
np
.
fromfile
(
f
,
dtype
=
np
.
float32
).
reshape
(
h
,
w
)
def
db_lstm
():
# 8 features
word
=
layers
.
data
(
name
=
'word_data'
,
shape
=
[
1
],
data_type
=
'int64'
)
predicate
=
layers
.
data
(
name
=
'verb_data'
,
shape
=
[
1
],
data_type
=
'int64'
)
ctx_n2
=
layers
.
data
(
name
=
'ctx_n2_data'
,
shape
=
[
1
],
data_type
=
'int64'
)
ctx_n1
=
layers
.
data
(
name
=
'ctx_n1_data'
,
shape
=
[
1
],
data_type
=
'int64'
)
ctx_0
=
layers
.
data
(
name
=
'ctx_0_data'
,
shape
=
[
1
],
data_type
=
'int64'
)
ctx_p1
=
layers
.
data
(
name
=
'ctx_p1_data'
,
shape
=
[
1
],
data_type
=
'int64'
)
ctx_p2
=
layers
.
data
(
name
=
'ctx_p2_data'
,
shape
=
[
1
],
data_type
=
'int64'
)
mark
=
layers
.
data
(
name
=
'mark_data'
,
shape
=
[
1
],
data_type
=
'int64'
)
predicate_embedding
=
layers
.
embedding
(
input
=
predicate
,
size
=
[
pred_len
,
word_dim
],
data_type
=
'float32'
,
is_sparse
=
IS_SPARSE
,
param_attr
=
{
'name'
:
'vemb'
})
mark_embedding
=
layers
.
embedding
(
input
=
mark
,
size
=
[
mark_dict_len
,
mark_dim
],
data_type
=
'float32'
,
is_sparse
=
IS_SPARSE
)
word_input
=
[
word
,
ctx_n2
,
ctx_n1
,
ctx_0
,
ctx_p1
,
ctx_p2
]
emb_layers
=
[
layers
.
embedding
(
size
=
[
word_dict_len
,
word_dim
],
input
=
x
,
param_attr
=
{
'name'
:
embedding_name
,
'trainable'
:
False
})
for
x
in
word_input
]
emb_layers
.
append
(
predicate_embedding
)
emb_layers
.
append
(
mark_embedding
)
hidden_0_layers
=
[
layers
.
fc
(
input
=
emb
,
size
=
hidden_dim
)
for
emb
in
emb_layers
]
hidden_0
=
layers
.
sums
(
input
=
hidden_0_layers
)
lstm_0
=
layers
.
dynamic_lstm
(
input
=
hidden_0
,
size
=
hidden_dim
,
candidate_activation
=
'relu'
,
gate_activation
=
'sigmoid'
,
cell_activation
=
'sigmoid'
)
# stack L-LSTM and R-LSTM with direct edges
input_tmp
=
[
hidden_0
,
lstm_0
]
for
i
in
range
(
1
,
depth
):
mix_hidden
=
layers
.
sums
(
input
=
[
layers
.
fc
(
input
=
input_tmp
[
0
],
size
=
hidden_dim
),
layers
.
fc
(
input
=
input_tmp
[
1
],
size
=
hidden_dim
)
])
lstm
=
layers
.
dynamic_lstm
(
input
=
mix_hidden
,
size
=
hidden_dim
,
candidate_activation
=
'relu'
,
gate_activation
=
'sigmoid'
,
cell_activation
=
'sigmoid'
,
is_reverse
=
((
i
%
2
)
==
1
))
input_tmp
=
[
mix_hidden
,
lstm
]
feature_out
=
layers
.
sums
(
input
=
[
layers
.
fc
(
input
=
input_tmp
[
0
],
size
=
label_dict_len
),
layers
.
fc
(
input
=
input_tmp
[
1
],
size
=
label_dict_len
)
])
return
feature_out
def
to_lodtensor
(
data
,
place
):
seq_lens
=
[
len
(
seq
)
for
seq
in
data
]
cur_len
=
0
lod
=
[
cur_len
]
for
l
in
seq_lens
:
cur_len
+=
l
lod
.
append
(
cur_len
)
flattened_data
=
np
.
concatenate
(
data
,
axis
=
0
).
astype
(
"int64"
)
flattened_data
=
flattened_data
.
reshape
([
len
(
flattened_data
),
1
])
res
=
core
.
LoDTensor
()
res
.
set
(
flattened_data
,
place
)
res
.
set_lod
([
lod
])
return
res
def
main
():
# define network topology
feature_out
=
db_lstm
()
target
=
layers
.
data
(
name
=
'target'
,
shape
=
[
1
],
data_type
=
'int64'
)
crf_cost
=
layers
.
linear_chain_crf
(
input
=
feature_out
,
label
=
target
,
param_attr
=
{
"name"
:
'crfw'
,
"learning_rate"
:
mix_hidden_lr
})
avg_cost
=
layers
.
mean
(
x
=
crf_cost
)
# TODO(qiao)
# 1. add crf_decode_layer and evaluator
# 2. use other optimizer and check why out will be NAN
sgd_optimizer
=
SGDOptimizer
(
learning_rate
=
0.0001
)
opts
=
sgd_optimizer
.
minimize
(
avg_cost
)
train_data
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
conll05
.
test
(),
buf_size
=
8192
),
batch_size
=
BATCH_SIZE
)
place
=
core
.
CPUPlace
()
exe
=
Executor
(
place
)
exe
.
run
(
framework
.
default_startup_program
())
embedding_param
=
g_scope
.
find_var
(
embedding_name
).
get_tensor
()
embedding_param
.
set
(
load_parameter
(
conll05
.
get_embedding
(),
word_dict_len
,
word_dim
),
place
)
batch_id
=
0
for
pass_id
in
xrange
(
PASS_NUM
):
for
data
in
train_data
():
word_data
=
to_lodtensor
(
map
(
lambda
x
:
x
[
0
],
data
),
place
)
ctx_n2_data
=
to_lodtensor
(
map
(
lambda
x
:
x
[
1
],
data
),
place
)
ctx_n1_data
=
to_lodtensor
(
map
(
lambda
x
:
x
[
2
],
data
),
place
)
ctx_0_data
=
to_lodtensor
(
map
(
lambda
x
:
x
[
3
],
data
),
place
)
ctx_p1_data
=
to_lodtensor
(
map
(
lambda
x
:
x
[
4
],
data
),
place
)
ctx_p2_data
=
to_lodtensor
(
map
(
lambda
x
:
x
[
5
],
data
),
place
)
verb_data
=
to_lodtensor
(
map
(
lambda
x
:
x
[
6
],
data
),
place
)
mark_data
=
to_lodtensor
(
map
(
lambda
x
:
x
[
7
],
data
),
place
)
target
=
to_lodtensor
(
map
(
lambda
x
:
x
[
8
],
data
),
place
)
outs
=
exe
.
run
(
framework
.
default_main_program
(),
feed
=
{
'word_data'
:
word_data
,
'ctx_n2_data'
:
ctx_n2_data
,
'ctx_n1_data'
:
ctx_n1_data
,
'ctx_0_data'
:
ctx_0_data
,
'ctx_p1_data'
:
ctx_p1_data
,
'ctx_p2_data'
:
ctx_p2_data
,
'verb_data'
:
verb_data
,
'mark_data'
:
mark_data
,
'target'
:
target
},
fetch_list
=
[
avg_cost
])
avg_cost_val
=
np
.
array
(
outs
[
0
])
if
batch_id
%
10
==
0
:
print
(
"avg_cost="
+
str
(
avg_cost_val
))
# exit early for CI
exit
(
0
)
batch_id
=
batch_id
+
1
if
__name__
==
'__main__'
:
main
()
python/paddle/v2/fluid/tests/test_gru_unit_op.py
浏览文件 @
8ba8237a
...
...
@@ -28,8 +28,8 @@ def relu(x):
class
TestGRUUnitOp
(
OpTest
):
batch_size
=
3
frame_size
=
5
batch_size
=
5
frame_size
=
10
activate
=
{
GRUActivationType
.
identity
:
identity
,
GRUActivationType
.
sigmoid
:
sigmoid
,
...
...
@@ -77,7 +77,7 @@ class TestGRUUnitOp(OpTest):
c
=
self
.
activate
[
self
.
attrs
[
'activation'
]](
np
.
dot
(
r_h_p
,
w_c
)
+
g
[:,
frame_size
*
2
:])
g
=
np
.
hstack
((
u_r
,
c
))
h
=
u
*
h_p
+
(
1
-
u
)
*
c
h
=
u
*
c
+
(
1
-
u
)
*
h_p
self
.
outputs
=
{
'Gate'
:
g
.
astype
(
'float64'
),
'ResetHiddenPrev'
:
r_h_p
.
astype
(
'float64'
),
...
...
@@ -92,10 +92,7 @@ class TestGRUUnitOp(OpTest):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
(
[
'Input'
,
'HiddenPrev'
,
'Weight'
],
[
'Hidden'
,
'ResetHiddenPrev'
,
'Gate'
],
max_relative_error
=
0.007
)
self
.
check_grad
([
'Input'
,
'HiddenPrev'
,
'Weight'
],
[
'Hidden'
])
class
TestGRUUnitOpWithBias
(
TestGRUUnitOp
):
...
...
@@ -104,18 +101,20 @@ class TestGRUUnitOpWithBias(TestGRUUnitOp):
frame_size
=
self
.
frame_size
super
(
TestGRUUnitOpWithBias
,
self
).
set_inputs
()
self
.
inputs
[
'Bias'
]
=
np
.
random
.
uniform
(
-
0.1
,
0.1
,
(
1
,
frame_size
*
3
)).
astype
(
'float
32
'
)
-
0.1
,
0.1
,
(
1
,
frame_size
*
3
)).
astype
(
'float
64
'
)
self
.
attrs
=
{
'activation'
:
GRUActivationType
.
identity
,
'gate_activation'
:
GRUActivationType
.
sigmoid
}
def
test_check_grad
(
self
):
self
.
check_grad
([
'Input'
,
'HiddenPrev'
,
'Weight'
,
'Bias'
],
[
'Hidden'
])
def
test_check_grad_ingore_input
(
self
):
self
.
check_grad
(
[
'
Input'
,
'
HiddenPrev'
,
'Weight'
,
'Bias'
],
[
'Hidden'
],
max_relative_error
=
0.007
)
[
'HiddenPrev'
,
'Weight'
,
'Bias'
],
[
'Hidden'
],
no_grad_set
=
set
(
'Input'
)
)
if
__name__
==
'__main__'
:
exit
(
0
)
# FIXME(yuyang18): This unittest is not pass. Fix it later
unittest
.
main
()
python/paddle/v2/fluid/tests/test_layers.py
浏览文件 @
8ba8237a
import
unittest
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.nets
as
nets
from
paddle.v2.fluid.framework
import
Program
import
paddle.v2.fluid.core
as
core
import
unittest
class
TestBook
(
unittest
.
TestCase
):
...
...
@@ -20,7 +20,8 @@ class TestBook(unittest.TestCase):
avg_cost
=
layers
.
mean
(
x
=
cost
,
main_program
=
program
)
self
.
assertIsNotNone
(
avg_cost
)
program
.
append_backward
(
avg_cost
)
print
str
(
program
)
# print str(program)
def
test_recognize_digits_mlp
(
self
):
program
=
Program
()
...
...
@@ -49,7 +50,7 @@ class TestBook(unittest.TestCase):
input
=
predict
,
label
=
label
,
main_program
=
program
)
avg_cost
=
layers
.
mean
(
x
=
cost
,
main_program
=
program
)
self
.
assertIsNotNone
(
avg_cost
)
print
str
(
program
)
#
print str(program)
def
test_simple_conv2d
(
self
):
program
=
Program
()
...
...
@@ -64,7 +65,7 @@ class TestBook(unittest.TestCase):
filter_size
=
[
4
,
4
],
main_program
=
program
)
print
str
(
program
)
#
print str(program)
def
test_recognize_digits_conv
(
self
):
program
=
Program
()
...
...
@@ -103,7 +104,7 @@ class TestBook(unittest.TestCase):
program
.
append_backward
(
avg_cost
)
print
str
(
program
)
#
print str(program)
def
test_word_embedding
(
self
):
program
=
Program
()
...
...
@@ -164,7 +165,24 @@ class TestBook(unittest.TestCase):
avg_cost
=
layers
.
mean
(
x
=
cost
,
main_program
=
program
)
self
.
assertIsNotNone
(
avg_cost
)
print
str
(
program
)
# print str(program)
def
test_linear_chain_crf
(
self
):
program
=
Program
()
# Change g_program, so the rest layers use `g_program`
images
=
layers
.
data
(
name
=
'pixel'
,
shape
=
[
784
],
data_type
=
'float32'
,
main_program
=
program
)
label
=
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
data_type
=
'int32'
,
main_program
=
program
)
hidden
=
layers
.
fc
(
input
=
images
,
size
=
128
,
main_program
=
program
)
crf
=
layers
.
linear_chain_crf
(
input
=
hidden
,
label
=
label
,
main_program
=
program
)
# print str(program)
if
__name__
==
'__main__'
:
...
...
python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py
浏览文件 @
8ba8237a
...
...
@@ -104,7 +104,7 @@ class TestLinearChainCrfOp(OpTest):
transition_exps
=
np
.
exp
(
transition
)
labels
=
np
.
random
.
randint
(
low
=
0
,
high
=
TAG_NUM
,
size
=
(
lod
[
-
1
][
-
1
],
1
),
dtype
=
"int
32
"
)
low
=
0
,
high
=
TAG_NUM
,
size
=
(
lod
[
-
1
][
-
1
],
1
),
dtype
=
"int
64
"
)
self
.
inputs
=
{
"Emission"
:
(
emission
,
lod
),
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录