Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
427644b2
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
427644b2
编写于
10月 23, 2017
作者:
C
caoying03
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix the computation kernels.
上级
c8d0d37c
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
84 addition
and
57 deletion
+84
-57
paddle/framework/operator.h
paddle/framework/operator.h
+1
-1
paddle/operators/linear_chain_crf_op.cc
paddle/operators/linear_chain_crf_op.cc
+75
-47
paddle/operators/linear_chain_crf_op.h
paddle/operators/linear_chain_crf_op.h
+1
-1
python/paddle/v2/framework/tests/test_linear_chain_crf_op.py
python/paddle/v2/framework/tests/test_linear_chain_crf_op.py
+7
-8
未找到文件。
paddle/framework/operator.h
浏览文件 @
427644b2
...
...
@@ -659,7 +659,7 @@ class OperatorWithKernel : public OperatorBase {
if
(
t
!=
nullptr
)
{
int
tmp
=
static_cast
<
int
>
(
ToDataType
(
t
->
type
()));
PADDLE_ENFORCE
(
tmp
==
data_type
||
data_type
==
-
1
,
"DataType of Paddle Op must be same."
);
"DataType of Paddle Op must be
the
same."
);
data_type
=
tmp
;
}
}
...
...
paddle/operators/linear_chain_crf_op.cc
浏览文件 @
427644b2
...
...
@@ -165,11 +165,11 @@ class LinearChainCrfOp : public framework::OperatorWithKernel {
"Output(LogLikelihood) should be not null."
);
auto
emission_dims
=
ctx
->
GetInputDim
(
"Emission"
);
auto
transition_dims
=
ctx
->
GetInputDim
(
"Transition"
);
auto
label_dims
=
ctx
->
GetInputDim
(
"Label"
);
PADDLE_ENFORCE_EQ
(
emission_dims
.
size
(),
2UL
,
"The Input(Emission) should be a 2-D tensor."
);
PADDLE_ENFORCE
(
emission_dims
[
0
],
"An empty mini-batch is not allowed."
);
auto
transition_dims
=
ctx
->
GetInputDim
(
"Transition"
);
PADDLE_ENFORCE_EQ
(
transition_dims
.
size
(),
2UL
,
"The Input(Transition) should be a 2-D tensor."
);
PADDLE_ENFORCE_EQ
(
...
...
@@ -180,6 +180,8 @@ class LinearChainCrfOp : public framework::OperatorWithKernel {
emission_dims
[
1
],
transition_dims
[
1
],
"The 2nd dimension of the Input(Emission) and the Input(Transition) "
"should be equal to the tag number."
);
auto
label_dims
=
ctx
->
GetInputDim
(
"Label"
);
PADDLE_ENFORCE
(
label_dims
.
size
()
==
2UL
&&
label_dims
[
1
]
==
1UL
,
"The Input(Label) should be a 2-D tensor with the 2nd "
"dimensions fixed to 1."
);
...
...
@@ -204,7 +206,7 @@ class LinearChainCrfOp : public framework::OperatorWithKernel {
// operator is determined by its input "Emission".
framework
::
DataType
IndicateDataType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Emission"
)
->
type
());
return
framework
::
ToDataType
(
ctx
.
Input
<
LoD
Tensor
>
(
"Emission"
)
->
type
());
}
};
...
...
@@ -224,6 +226,8 @@ class LinearChainCrfOpKernel<platform::CPUPlace, T>
auto
*
label
=
ctx
.
Input
<
LoDTensor
>
(
"Label"
);
auto
in_lod
=
emission_weights
->
lod
();
PADDLE_ENFORCE
(
in_lod
.
size
(),
"Input(Emission) is not a sequence."
);
// TODO(caoying) The checks related to LoD information should be
// moved into InferShape once after the InferShape is refactored.
PADDLE_ENFORCE_EQ
(
emission_weights
->
NumLevels
(),
1UL
,
...
...
@@ -266,12 +270,17 @@ class LinearChainCrfOpKernel<platform::CPUPlace, T>
for
(
size_t
i
=
0
;
i
<
seq_num
;
++
i
)
{
int
start_pos
=
static_cast
<
int
>
(
in_lod
[
level
][
i
]);
int
end_pos
=
static_cast
<
int
>
(
in_lod
[
level
][
i
+
1
]);
if
(
end_pos
==
start_pos
)
{
// If an empty input sequence is given, pad 0 for its cost.
log_likelihood
[
i
]
=
static_cast
<
T
>
(
0.
);
continue
;
}
const
Tensor
one_seq
=
emission_weights
->
Slice
<
T
>
(
start_pos
,
end_pos
);
Tensor
one_seq_row_max
=
emission_row_max
.
Slice
<
T
>
(
start_pos
,
end_pos
);
Tensor
one_seq_exps
=
emission_exps
->
Slice
<
T
>
(
start_pos
,
end_pos
);
const
Tensor
one_seq_label
=
label
->
Slice
<
T
>
(
start_pos
,
end_pos
);
Tensor
one_seq_alpha
=
alpha
->
Slice
<
T
>
(
start_pos
,
end_pos
);
const
Tensor
one_seq
=
emission_weights
->
Slice
(
start_pos
,
end_pos
);
Tensor
one_seq_row_max
=
emission_row_max
.
Slice
(
start_pos
,
end_pos
);
Tensor
one_seq_exps
=
emission_exps
->
Slice
(
start_pos
,
end_pos
);
const
Tensor
one_seq_label
=
label
->
Slice
(
start_pos
,
end_pos
);
Tensor
one_seq_alpha
=
alpha
->
Slice
(
start_pos
,
end_pos
);
log_likelihood
[
i
]
=
ForwardOneSequence
(
&
one_seq
,
&
one_seq_row_max
,
&
one_seq_exps
,
transition_weights
,
...
...
@@ -306,7 +315,7 @@ class LinearChainCrfOpKernel<platform::CPUPlace, T>
for
(
size_t
k
=
1
;
k
<
seq_length
;
++
k
)
{
for
(
size_t
i
=
0
;
i
<
tag_num
;
++
i
)
{
T
sum
=
0.
;
T
sum
=
static_cast
<
T
>
(
0.
)
;
for
(
size_t
j
=
0
;
j
<
tag_num
;
++
j
)
{
sum
+=
alpha_value
[(
k
-
1
)
*
tag_num
+
j
]
*
w_exps
[(
j
+
state_trans_base_idx
)
*
tag_num
+
i
];
...
...
@@ -326,11 +335,14 @@ class LinearChainCrfOpKernel<platform::CPUPlace, T>
PADDLE_ENFORCE_LT
(
*
std
::
max_element
(
lbl
,
lbl
+
seq_length
),
tag_num
,
"An invalid tag label that execesses the largest tag number."
);
// Calculate the nominator part, which depends on the label sequence.
ll
+=
w
[
lbl
[
0
]]
/*start transition*/
+
x
[
lbl
[
0
]]
+
w
[
tag_num
+
lbl
[
seq_length
-
1
]]
/*end transition*/
;
for
(
size_t
k
=
1
;
k
<
seq_length
;
++
k
)
ll
+=
x
[
k
*
tag_num
+
lbl
[
k
]]
+
w
[
lbl
[
k
-
1
]
*
tag_num
+
lbl
[
k
]];
for
(
size_t
k
=
1
;
k
<
seq_length
;
++
k
)
{
ll
+=
x
[
k
*
tag_num
+
lbl
[
k
]]
+
w
[(
lbl
[
k
-
1
]
+
state_trans_base_idx
)
*
tag_num
+
lbl
[
k
]];
}
return
-
ll
;
}
};
...
...
@@ -353,12 +365,13 @@ class LinearChainCrfGradOp : public framework::OperatorWithKernel {
"Output(Transition@GRAD) should be not null."
);
auto
emission_exps_dims
=
ctx
->
GetInputDim
(
"EmissionExps"
);
auto
transition_exps_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"TransitionExps"
));
auto
label_dims
=
ctx
->
GetInputDim
(
"Label"
);
PADDLE_ENFORCE_EQ
(
emission_exps_dims
.
size
(),
2UL
,
"The Input(EmissionExps) should be a 2-D tensor."
);
PADDLE_ENFORCE
(
emission_exps_dims
[
0
],
"An empty mini-batch is not allowed."
);
auto
transition_exps_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"TransitionExps"
));
PADDLE_ENFORCE_EQ
(
transition_exps_dims
.
size
(),
2UL
,
"The Input(TransitionExps) should be a 2-D tensor."
);
PADDLE_ENFORCE_EQ
(
...
...
@@ -369,6 +382,8 @@ class LinearChainCrfGradOp : public framework::OperatorWithKernel {
emission_exps_dims
[
1
],
transition_exps_dims
[
1
],
"The 2nd dimension of the Input(EmissionExps) and the "
"Input(TransitionExps) should be equal to the tag number."
);
auto
label_dims
=
ctx
->
GetInputDim
(
"Label"
);
PADDLE_ENFORCE
(
label_dims
.
size
()
==
2UL
&&
label_dims
[
1
]
==
1UL
,
"The Input(Label) should be a 2-D tensor with the 2nd "
"dimensions fixed to 1."
);
...
...
@@ -381,6 +396,14 @@ class LinearChainCrfGradOp : public framework::OperatorWithKernel {
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Transition"
),
transition_exps_dims
);
}
protected:
// Explicitly set that the data type of output of the linear_chain_crf_grad
// operator is determined by its input "EmissionExps".
framework
::
DataType
IndicateDataType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
ToDataType
(
ctx
.
Input
<
LoDTensor
>
(
"EmissionExps"
)
->
type
());
}
};
template
<
typename
T
>
...
...
@@ -390,12 +413,12 @@ class LinearChainCrfGradOpKernel<platform::CPUPlace, T>
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"This kernel only runs on CPU."
);
auto
*
ll_grad
=
ctx
.
Input
<
LoDTensor
>
(
framework
::
GradVarName
(
"LogLikelihood"
));
auto
*
label
=
ctx
.
Input
<
LoDTensor
>
(
"Label"
);
auto
*
emission_exps
=
ctx
.
Input
<
LoDTensor
>
(
"EmissionExps"
);
auto
*
transition_exps
=
ctx
.
Input
<
Tensor
>
(
"TransitionExps"
);
auto
*
alpha
=
ctx
.
Input
<
Tensor
>
(
"Alpha"
);
auto
*
alpha
=
ctx
.
Input
<
LoDTensor
>
(
"Alpha"
);
const
T
*
ll_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"LogLikelihood"
))
->
data
<
T
>
();
auto
*
emission_grad
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Emission"
));
...
...
@@ -413,34 +436,31 @@ class LinearChainCrfGradOpKernel<platform::CPUPlace, T>
Tensor
beta
;
beta
.
mutable_data
<
T
>
(
emission_dims
,
platform
::
CPUPlace
());
auto
place
=
ctx
.
GetEigenDevice
<
platform
::
CPUPlace
>
();
auto
x_grad
=
EigenMatrix
<
T
>::
From
(
*
emission_grad
);
auto
out_grad
=
EigenMatrix
<
T
>::
From
(
*
ll_grad
);
x_grad
.
device
(
place
)
=
x_grad
*
out_grad
.
broadcast
(
Eigen
::
DSizes
<
int
,
2
>
(
1
,
emission_dims
[
1
]));
const
size_t
level
=
0
;
// currently, only support sequence.
auto
lod
=
emission_exps
->
lod
();
auto
lod
=
label
->
lod
();
PADDLE_ENFORCE
(
lod
.
size
(),
"Input(Label) is not a sequence."
);
for
(
size_t
i
=
0
;
i
<
lod
[
level
].
size
()
-
1
;
++
i
)
{
int
start_pos
=
static_cast
<
int
>
(
lod
[
level
][
i
]);
int
end_pos
=
static_cast
<
int
>
(
lod
[
level
][
i
+
1
]);
if
(
end_pos
==
start_pos
)
continue
;
const
Tensor
one_seq_emission_exps
=
emission_exps
->
Slice
<
T
>
(
start_pos
,
end_pos
);
const
Tensor
one_seq_label
=
label
->
Slice
<
T
>
(
start_pos
,
end_pos
);
const
Tensor
one_seq_alpha
=
alpha
->
Slice
<
T
>
(
start_pos
,
end_pos
);
Tensor
one_seq_beta
=
beta
.
Slice
<
T
>
(
start_pos
,
end_pos
);
Tensor
one_seq_emission_grad
=
emission_grad
->
Slice
<
T
>
(
start_pos
,
end_pos
);
BackwardOneSequence
(
ctx
.
device_context
(),
&
one_seq_emiss
ion_exps
,
transition_exps
,
&
one_seq_alpha
,
&
one_seq_label
,
&
one_seq_beta
,
trans_grad
,
&
one_seq_emission_grad
);
emission_exps
->
Slice
(
start_pos
,
end_pos
);
const
Tensor
one_seq_label
=
label
->
Slice
(
start_pos
,
end_pos
);
const
Tensor
one_seq_alpha
=
alpha
->
Slice
(
start_pos
,
end_pos
);
Tensor
one_seq_beta
=
beta
.
Slice
(
start_pos
,
end_pos
);
Tensor
one_seq_emission_grad
=
emission_grad
->
Slice
(
start_pos
,
end_pos
);
BackwardOneSequence
(
ctx
.
device_context
(),
ll_grad
[
i
],
&
one_seq_emission_exps
,
transit
ion_exps
,
&
one_seq_alpha
,
&
one_seq_label
,
&
one_seq_beta
,
trans_grad
,
&
one_seq_emission_grad
);
}
}
protected:
void
BackwardOneSequence
(
const
platform
::
DeviceContext
&
ctx
,
void
BackwardOneSequence
(
const
platform
::
DeviceContext
&
ctx
,
const
T
ll_grad
,
const
Tensor
*
emission_exps
,
const
Tensor
*
transition_exps
,
const
Tensor
*
alpha
,
const
Tensor
*
label
,
Tensor
*
beta
,
...
...
@@ -457,12 +477,15 @@ class LinearChainCrfGradOpKernel<platform::CPUPlace, T>
const
size_t
state_trans_base_idx
=
2
;
// Calculate the backwark vectors beta.
for
(
int
i
=
0
;
i
<
tag_num
;
++
i
)
// First, calculate the initialition state.
for
(
int
i
=
0
;
i
<
tag_num
;
++
i
)
{
beta_value
[(
seq_length
-
1
)
*
tag_num
+
i
]
=
w_exps
[
tag_num
+
i
];
}
NormalizeL1
<
T
>
(
beta_value
+
(
seq_length
-
1
)
*
tag_num
,
tag_num
);
for
(
int
k
=
seq_length
-
2
;
k
>=
0
;
--
k
)
{
for
(
int
i
=
0
;
i
<
tag_num
;
++
i
)
{
T
sum
=
0.
;
T
sum
=
static_cast
<
T
>
(
0.
)
;
for
(
int
j
=
0
;
j
<
tag_num
;
++
j
)
{
sum
+=
w_exps
[(
i
+
state_trans_base_idx
)
*
tag_num
+
j
]
*
x_exps
[(
k
+
1
)
*
tag_num
+
j
]
*
...
...
@@ -476,6 +499,7 @@ class LinearChainCrfGradOpKernel<platform::CPUPlace, T>
auto
alpha_mat
=
EigenMatrix
<
T
>::
From
(
*
alpha
);
auto
beta_mat
=
EigenMatrix
<
T
>::
From
(
*
beta
);
auto
x_grad_mat
=
EigenMatrix
<
T
>::
From
(
*
emission_grad
);
x_grad_mat
.
setConstant
(
ll_grad
);
auto
*
place
=
ctx
.
GetEigenDevice
<
platform
::
CPUPlace
>
();
x_grad_mat
.
device
(
*
place
)
=
alpha_mat
*
beta_mat
;
...
...
@@ -483,8 +507,9 @@ class LinearChainCrfGradOpKernel<platform::CPUPlace, T>
.
reshape
(
Eigen
::
DSizes
<
int
,
2
>
(
seq_length
,
1
))
.
broadcast
(
Eigen
::
DSizes
<
int
,
2
>
(
1
,
tag_num
));
for
(
int
k
=
0
;
k
<
seq_length
;
++
k
)
for
(
int
k
=
0
;
k
<
seq_length
;
++
k
)
{
x_grad_mat
(
k
,
label_value
[
k
])
-=
static_cast
<
T
>
(
1
);
}
if
(
transition_grad
)
{
T
*
trans_grad
=
transition_grad
->
data
<
T
>
();
...
...
@@ -501,20 +526,23 @@ class LinearChainCrfGradOpKernel<platform::CPUPlace, T>
.
broadcast
(
Eigen
::
DSizes
<
int
,
2
>
(
1
,
tag_num
));
for
(
int
k
=
1
;
k
<
seq_length
;
++
k
)
{
T
sum
=
0.
;
T
sum
=
static_cast
<
T
>
(
0.
)
;
for
(
int
i
=
0
;
i
<
tag_num
;
++
i
)
{
for
(
int
j
=
0
;
j
<
tag_num
;
++
j
)
sum
+=
x_exps_mat
(
i
,
j
)
*
alpha_mat
(
k
-
1
,
i
)
*
beta_mat
(
k
,
j
);
for
(
int
j
=
0
;
j
<
tag_num
;
++
j
)
{
sum
+=
w_exps
[(
i
+
state_trans_base_idx
)
*
tag_num
+
j
]
*
alpha_mat
(
k
-
1
,
i
)
*
beta_mat
(
k
,
j
);
}
}
sum
=
static_cast
<
T
>
(
1
)
/
sum
;
sum
=
static_cast
<
T
>
(
1
.
)
/
sum
;
for
(
int
i
=
0
;
i
<
tag_num
;
++
i
)
{
for
(
int
j
=
0
;
j
<
tag_num
;
++
j
)
{
trans_grad
[(
i
+
2
)
*
tag_num
+
j
]
+=
sum
*
x_exps_mat
(
i
,
j
)
*
alpha_mat
(
k
-
1
,
i
)
*
beta_mat
(
k
,
j
);
trans_grad
[(
i
+
state_trans_base_idx
)
*
tag_num
+
j
]
+=
sum
*
w_exps
[(
i
+
state_trans_base_idx
)
*
tag_num
+
j
]
*
alpha_mat
(
k
-
1
,
i
)
*
beta_mat
(
k
,
j
);
}
}
trans_grad
[
label_value
[
k
-
1
]
*
tag_num
+
label_value
[
k
]]
-=
static_cast
<
T
>
(
1
);
static_cast
<
T
>
(
1
.
);
}
}
}
...
...
paddle/operators/linear_chain_crf_op.h
浏览文件 @
427644b2
...
...
@@ -42,7 +42,7 @@ class LinearChainCrfGradOpKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
;
protected:
void
BackwardOneSequence
(
const
platform
::
DeviceContext
&
ctx
,
void
BackwardOneSequence
(
const
platform
::
DeviceContext
&
ctx
,
const
T
ll_grad
,
const
Tensor
*
emission_exps
,
const
Tensor
*
transition_exps
,
const
Tensor
*
alpha
,
const
Tensor
*
label
,
Tensor
*
beta
,
...
...
python/paddle/v2/framework/tests/test_linear_chain_crf_op.py
浏览文件 @
427644b2
...
...
@@ -4,8 +4,6 @@ import numpy as np
from
op_test
import
OpTest
import
pdb
class
LinearChainCrfForward
(
object
):
def
__init__
(
self
,
seq_start_positions
,
emission_weights
,
emission_row_max
,
...
...
@@ -65,10 +63,10 @@ class LinearChainCrfForward(object):
# calculate the nominator part.
log_likelihood
+=
(
self
.
a
[
label
[
0
]]
+
self
.
x
[
0
,
label
[
0
]]
+
self
.
b
[
label
[
-
1
]])
self
.
a
[
label
[
0
]]
+
x
[
0
,
label
[
0
]]
+
self
.
b
[
label
[
-
1
]])
for
k
in
range
(
1
,
seq_len
):
log_likelihood
+=
(
self
.
x
[
k
,
label
[
k
]]
+
self
.
w
[
label
[
k
-
1
],
label
[
k
]])
log_likelihood
+=
(
x
[
k
,
label
[
k
]]
+
self
.
w
[
label
[
k
-
1
],
label
[
k
]])
return
-
log_likelihood
def
crf_forward_compute
(
self
):
...
...
@@ -77,7 +75,7 @@ class LinearChainCrfForward(object):
end
=
self
.
seq_start_positions
[
i
+
1
]
self
.
log_likelihood
[
i
]
=
self
.
_forward_a_sequence
(
self
.
x
[
start
:
end
],
self
.
x_row_max
[
start
:
end
,
:],
self
.
x
[
start
:
end
,
:
],
self
.
x_row_max
[
start
:
end
,
:],
self
.
x_exps
[
start
:
end
,
:],
self
.
labels
[
start
:
end
,
:],
self
.
alpha
[
start
:
end
,
:])
return
self
.
alpha
,
self
.
log_likelihood
...
...
@@ -85,10 +83,11 @@ class LinearChainCrfForward(object):
class
TestLinearChainCrfOp
(
OpTest
):
def
set_test_data
(
self
):
SEQ_NUM
=
3
SEQ_NUM
=
2
TAG_NUM
=
17
MAX_SEQ_LEN
=
13
MAX_SEQ_LEN
=
5
random
.
seed
(
1
)
# the linear_chain_crf operator only supports sequence (LoD level = 1)
lod
=
[[
0
]]
for
i
in
range
(
SEQ_NUM
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录