未验证 提交 45c8a88a 编写于 作者: Q Qiao Longfei 提交者: GitHub

add crf_decoding layer (#6274)

* add crf_decoding layer

* fix some typo

* fix test_crf_decoding_op
上级 e760641a
......@@ -36,17 +36,18 @@ class CRFDecodingOpMaker : public framework::OpProtoAndCheckerMaker {
"w. See more details in comments of the linear_chain_crf operator.");
AddInput(
"Label",
"(LoDTensor, LoDTensor<int>). The ground truth with shape "
"(LoDTensor, LoDTensor<int64_t>). The ground truth with shape "
"[N x 1]. This input is optional. See more details in the operator's "
"comments.")
.AsDispensable();
AddOutput("ViterbiPath",
"(LoDTensor, LoDTensor<int>). The decoding results. What to "
"return changes depending on whether the Input(Label) (the groud "
AddOutput(
"ViterbiPath",
"(LoDTensor, LoDTensor<int64_t>). The decoding results. What to "
"return changes depending on whether the Input(Label) (the ground "
"truth) is given. See more details in the operator's comment.");
AddComment(R"DOC(
The crf_decoding operator reads the emission feature weights and the transition
freature weights learned by the linear_chain_crf operator. It implements the
feature weights learned by the linear_chain_crf operator. It implements the
Viterbi algorithm which is a dynamic programming algorithm for finding the most
likely sequence of hidden states, called the Viterbi path, that results in a
sequence of observed tags.
......@@ -60,14 +61,14 @@ operator.
When Input(Label) is given, the crf_decoding operator returns a row vector
with shape [N x 1] whose values are fixed to be 0, indicating an incorrect
prediction, or 1 indicating a tag is correctly predicted. Such an ouput is the
prediction, or 1 indicating a tag is correctly predicted. Such an output is the
input to chunk_eval operator.
2. Input(Label) is not given:
This is the standard decoding process.
The crf_decoding operator returns a row vecotr with shape [N x 1] whose values
The crf_decoding operator returns a row vector with shape [N x 1] whose values
range from 0 to maximum tag number - 1. Each element indicates an index of a
predicted tag.
)DOC");
......
......@@ -43,8 +43,8 @@ class CRFDecodingOpKernel : public framework::OpKernel<T> {
const size_t level = 0;
const size_t seq_num = lod[level].size() - 1;
int* path = decoded_path->mutable_data<int>(platform::CPUPlace());
math::SetConstant<platform::CPUPlace, int>()(ctx.device_context(),
int64_t* path = decoded_path->mutable_data<int64_t>(platform::CPUPlace());
math::SetConstant<platform::CPUPlace, int64_t>()(ctx.device_context(),
decoded_path, 0);
for (size_t i = 0; i < seq_num; ++i) {
int start_pos = static_cast<int>(lod[level][i]);
......@@ -57,7 +57,7 @@ class CRFDecodingOpKernel : public framework::OpKernel<T> {
if (label) {
PADDLE_ENFORCE_EQ(label->NumLevels(), 1UL,
"The Input(Label) should be a sequence.");
const int* label_value = label->data<int>();
const int64_t* label_value = label->data<int64_t>();
size_t batch_size = emission_weights->dims()[0];
for (size_t i = 0; i < batch_size; ++i) {
path[i] = label_value[i] == path[i] ? 1 : 0;
......@@ -76,7 +76,7 @@ class CRFDecodingOpKernel : public framework::OpKernel<T> {
const T* x = emission_weights.data<T>();
const T* w = transition_weights.data<T>();
int* path = decoded_path->data<int>();
int64_t* path = decoded_path->data<int64_t>();
// alpha is a memo table. An element alpha(k, v) records the score of the
// best sequence of tags from position 1 to position k with v being the end
......
......@@ -237,7 +237,7 @@ class Operator(object):
def find_name(var_list, name):
for var_name in var_list:
if var_name == name:
if var_list[var_name] is not None and var_name == name:
return True
return False
......
import copy
import itertools
from framework import Variable, default_main_program, default_startup_program, \
from framework import Variable, Parameter, default_main_program, default_startup_program, \
unique_name, dtype_is_floating
from paddle.v2.fluid.initializer import Constant, Xavier
from param_attr import ParamAttr
......@@ -122,6 +122,12 @@ class LayerHelper(object):
return self.main_program.global_block().create_parameter(
dtype=dtype, shape=shape, **attr.to_kwargs())
def get_parameter(self, name):
param = self.main_program.global_block().var(name)
if not isinstance(param, Parameter):
raise ValueError("no Parameter name %s found" % name)
return param
def create_tmp_variable(self, dtype):
return self.main_program.current_block().create_var(
name=unique_name(".".join([self.name, 'tmp'])),
......
......@@ -477,6 +477,24 @@ def linear_chain_crf(input,
return log_likelihood
def crf_decoding(input,
param_attr,
label=None,
main_program=None,
startup_program=None):
helper = LayerHelper('crf_decoding', **locals())
transition = helper.get_parameter(param_attr.name)
viterbi_path = helper.create_tmp_variable(dtype=helper.input_dtype())
helper.append_op(
type='crf_decoding',
inputs={"Emission": [input],
"Transition": transition,
"Label": label},
outputs={"ViterbiPath": [viterbi_path]})
return viterbi_path
def assign(input, output, main_program=None, startup_program=None):
helper = LayerHelper('assign', **locals())
helper.append_op(
......
......@@ -137,12 +137,19 @@ def main():
param_attr=fluid.ParamAttr(
name='crfw', learning_rate=mix_hidden_lr))
avg_cost = fluid.layers.mean(x=crf_cost)
# TODO(qiao)
# 1. add crf_decode_layer and evaluator
# 2. use other optimizer and check why out will be NAN
# check other optimizers and check why out will be NAN
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.0001)
sgd_optimizer.minimize(avg_cost)
# TODO(qiao)
# add dependency track and move this config before optimizer
crf_decode = fluid.layers.crf_decoding(
input=feature_out,
label=target,
param_attr=fluid.ParamAttr(name='crfw'))
train_data = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.conll05.test(), buf_size=8192),
......@@ -168,7 +175,6 @@ def main():
feed=feeder.feed(data),
fetch_list=[avg_cost])
avg_cost_val = np.array(outs[0])
if batch_id % 10 == 0:
print("avg_cost=" + str(avg_cost_val))
......
......@@ -20,14 +20,14 @@ class CRFDecoding(object):
self.w = transition_weights[2:, :]
self.track = np.zeros(
(seq_start_positions[-1], self.tag_num), dtype="int32")
(seq_start_positions[-1], self.tag_num), dtype="int64")
self.decoded_path = np.zeros(
(seq_start_positions[-1], 1), dtype="int32")
(seq_start_positions[-1], 1), dtype="int64")
def _decode_one_sequence(self, decoded_path, x):
seq_len, tag_num = x.shape
alpha = np.zeros((seq_len, tag_num), dtype="float64")
track = np.zeros((seq_len, tag_num), dtype="int32")
track = np.zeros((seq_len, tag_num), dtype="int64")
for i in range(tag_num):
alpha[0, i] = self.a[i] + x[0, i]
......@@ -125,10 +125,10 @@ class TestCRFDecodingOp2(OpTest):
axis=0)
labels = np.random.randint(
low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int32")
low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int64")
predicted_labels = np.ones(
(lod[-1][-1], 1), dtype="int32") * (TAG_NUM - 1)
expected_output = (labels == predicted_labels).astype("int32")
(lod[-1][-1], 1), dtype="int64") * (TAG_NUM - 1)
expected_output = (labels == predicted_labels).astype("int64")
self.inputs = {
"Emission": (emission, lod),
......
......@@ -4,6 +4,7 @@ import unittest
import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets
from paddle.v2.fluid.framework import Program, program_guard
from paddle.v2.fluid.param_attr import ParamAttr
class TestBook(unittest.TestCase):
......@@ -132,8 +133,12 @@ class TestBook(unittest.TestCase):
images = layers.data(name='pixel', shape=[784], dtype='float32')
label = layers.data(name='label', shape=[1], dtype='int32')
hidden = layers.fc(input=images, size=128)
crf = layers.linear_chain_crf(input=hidden, label=label)
crf = layers.linear_chain_crf(
input=hidden, label=label, param_attr=ParamAttr(name="crfw"))
crf_decode = layers.crf_decoding(
input=hidden, param_attr=ParamAttr(name="crfw"))
self.assertNotEqual(crf, None)
self.assertNotEqual(crf_decode, None)
print(str(program))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册