diff --git a/paddle/operators/gru_unit_op.cc b/paddle/operators/gru_unit_op.cc index 89c027ff1eea93012dc5ab22b081786efc328e96..877c969103cfc17e1b170449d1922d9c7db2a58b 100644 --- a/paddle/operators/gru_unit_op.cc +++ b/paddle/operators/gru_unit_op.cc @@ -114,18 +114,19 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker { .SetDefault(sigmoid) .InEnum({identity, sigmoid, tanh, relu}); AddComment(R"DOC( -GRUUnit Operator. - -This operator implements partial calculations of the GRU unit as follows: +GRUUnit Operator implements partial calculations of the GRU unit as following: $$ -update \ gate: u_t = actGate(xu_t + W_u * hidden_{prev} + bias_u) \\ -reset \ gate: r_t = actGate(xr_t + W_r * hidden_{prev} + bias_r) \\ -output \ candidate: {h}_t = actNode({xc}_t + W_c * dot(r_t, hidden_{prev}) + bias_c) \\ -output: h_t = dot((1-u_t), {h}_t) + dot(u_t, hidden_{prev}) +update \ gate: u_t = actGate(xu_t + W_u * h_{t-1} + b_u) \\ +reset \ gate: r_t = actGate(xr_t + W_r * h_{t-1} + b_r) \\ +output \ candidate: {h}_t = actNode(xc_t + W_c * dot(r_t, h_{t-1}) + b_c) \\ +output: h_t = dot((1 - u_t), h_{t-1}) + dot(u_t, {h}_t) $$ -The rest of GRU unit can be completed by using FCOp's output as the input of GRUUnitOp. +which is same as one time step of GRU Operator. + +@note To implement the complete GRU unit, fully-connected operator must be +used before to feed xu, xr and xc as the Input of GRUUnit operator. )DOC"); } @@ -150,12 +151,6 @@ class GRUUnitGradOp : public framework::OperatorWithKernel { "ResetHiddenPrev"); PADDLE_ENFORCE(ctx->HasInput("Hidden"), "Input(%s) of GRUUnitGradOp should not be null.", "Hidden"); - PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Gate")), - "Input(%s@GRAD) of GRUUnitGradOp should not be null.", - "Gate"); - PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("ResetHiddenPrev")), - "Input(%s@GRAD) of GRUUnitGradOp should not be null.", - "ResetHiddenPrev"); PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Hidden")), "Input(%s@GRAD) of GRUUnitGradOp should not be null.", "Hidden"); diff --git a/paddle/operators/gru_unit_op.h b/paddle/operators/gru_unit_op.h index c53e7d9827e0395e6ce613302e732b2797f83cdd..050430d3252d05236219cd5ced5a792c21413c1f 100644 --- a/paddle/operators/gru_unit_op.h +++ b/paddle/operators/gru_unit_op.h @@ -110,7 +110,7 @@ class GRUUnitKernel : public framework::OpKernel { auto c = g.slice(c_offsets, extents); // output candidate // calculate final output - h.device(place) = u * (h_p - c) + c; + h.device(place) = u * (c - h_p) + h_p; } }; @@ -146,35 +146,27 @@ class GRUUnitGradKernel : public framework::OpKernel { auto* weight_grad = context.Output(framework::GradVarName("Weight")); auto* bias_grad = context.Output(framework::GradVarName("Bias")); - input_grad->mutable_data(context.GetPlace()); - hidden_prev_grad->mutable_data(context.GetPlace()); - weight_grad->mutable_data(context.GetPlace()); Tensor gate_grad; - gate_grad.mutable_data(input->dims(), context.GetPlace()); Tensor reset_hidden_prev_grad; - reset_hidden_prev_grad.mutable_data(reset_hidden_prev->dims(), - context.GetPlace()); - - int batch_size = input->dims()[0]; - int frame_size = hidden_prev->dims()[1]; const T* hidden_prev_data = hidden_prev->data(); - T* hidden_prev_grad_data = hidden_prev_grad->data(); const T* weight_data = weight->data(); - T* weight_grad_data = weight_grad->data(); - T* gate_grad_data = gate_grad.data(); + T* gate_grad_data = + gate_grad.mutable_data(input->dims(), context.GetPlace()); const T* reset_hidden_prev_data = reset_hidden_prev->data(); - T* reset_hidden_prev_grad_data = reset_hidden_prev_grad.data(); + T* reset_hidden_prev_grad_data = reset_hidden_prev_grad.mutable_data( + reset_hidden_prev->dims(), context.GetPlace()); auto h_p = EigenMatrix::From(*hidden_prev); auto g = EigenMatrix::From(*gate); auto d_h = EigenMatrix::From(*hidden_grad); - auto d_x = EigenMatrix::From(*input_grad); - auto d_h_p = EigenMatrix::From(*hidden_prev_grad); auto d_g = EigenMatrix::From(gate_grad); auto d_r_h_p = EigenMatrix::From(reset_hidden_prev_grad); auto place = context.GetEigenDevice(); + int batch_size = input->dims()[0]; + int frame_size = hidden_prev->dims()[1]; + Eigen::array extents({{batch_size, frame_size}}); Eigen::array u_offsets({{0, 0}}); auto u = g.slice(u_offsets, extents); // update gate @@ -185,38 +177,52 @@ class GRUUnitGradKernel : public framework::OpKernel { // backward for unactivated update gate ActGradCompute(context.Attr("gate_activation"), place, u, u, - d_g.slice(u_offsets, extents), d_h * (h_p - c)); + d_g.slice(u_offsets, extents), d_h * (c - h_p)); // backward for unactivated output candidate ActGradCompute(context.Attr("activation"), place, c, c, - d_g.slice(c_offsets, extents), d_h * (u.constant(T(1)) - u)); + d_g.slice(c_offsets, extents), d_h * u); // backward for reset_hidden_prev math::gemm(context.device_context(), false, true, batch_size, frame_size, frame_size, 1, gate_grad_data + frame_size * 2, frame_size * 3, weight_data + frame_size * frame_size * 2, frame_size, 0, reset_hidden_prev_grad_data, frame_size); - // backward for state_weight - math::gemm( - context.device_context(), true, false, frame_size, frame_size, - batch_size, 1, reset_hidden_prev_data, frame_size, - gate_grad_data + frame_size * 2, frame_size * 3, 0, - weight_grad_data + frame_size * frame_size * 2, frame_size); // backward for unactivated reset gate ActGradCompute(context.Attr("gate_activation"), place, r, r, d_g.slice(r_offsets, extents), d_r_h_p * h_p); - // backward for update_gate_weight and reset_gate_weight - math::gemm(context.device_context(), true, false, frame_size, - frame_size * 2, batch_size, 1, hidden_prev_data, - frame_size, gate_grad_data, frame_size * 3, 0, - weight_grad_data, frame_size * 2); + // backward for weight + if (weight_grad) { + T* weight_grad_data = weight_grad->mutable_data(context.GetPlace()); + // backward for state_weight + math::gemm( + context.device_context(), true, false, frame_size, frame_size, + batch_size, 1, reset_hidden_prev_data, frame_size, + gate_grad_data + frame_size * 2, frame_size * 3, 0, + weight_grad_data + frame_size * frame_size * 2, frame_size); + + // backward for update_gate_weight and reset_gate_weight + math::gemm(context.device_context(), true, false, frame_size, + frame_size * 2, batch_size, 1, hidden_prev_data, + frame_size, gate_grad_data, frame_size * 3, 0, + weight_grad_data, frame_size * 2); + } // backward for hidden_prev - d_h_p.device(place) = d_r_h_p * r + d_h * u; - math::gemm(context.device_context(), false, true, batch_size, - frame_size, frame_size * 2, 1, gate_grad_data, - frame_size * 3, weight_data, frame_size * 2, 1, - hidden_prev_grad_data, frame_size); + if (hidden_prev_grad) { + T* hidden_prev_grad_data = + hidden_prev_grad->mutable_data(context.GetPlace()); + auto d_h_p = EigenMatrix::From(*hidden_prev_grad); + d_h_p.device(place) = d_r_h_p * r + d_h * (u.constant(T(1)) - u); + math::gemm(context.device_context(), false, true, batch_size, + frame_size, frame_size * 2, 1, gate_grad_data, + frame_size * 3, weight_data, frame_size * 2, 1, + hidden_prev_grad_data, frame_size); + } // backward for input - d_x.device(place) = d_g; + if (input_grad) { + input_grad->mutable_data(context.GetPlace()); + auto d_x = EigenMatrix::From(*input_grad); + d_x.device(place) = d_g; + } // backward for bias if (bias_grad) { bias_grad->mutable_data(context.GetPlace()); diff --git a/paddle/operators/linear_chain_crf_op.h b/paddle/operators/linear_chain_crf_op.h index ddf73981751798c72cef08f2dd5c87580b45aec3..872f659fed40d7479d9d8bed6c8469fb28282253 100644 --- a/paddle/operators/linear_chain_crf_op.h +++ b/paddle/operators/linear_chain_crf_op.h @@ -271,7 +271,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel { ll -= std::log(sum); // Now ll is equal to -log(Z). - const int* lbl = label.data(); + const int64_t* lbl = label.data(); PADDLE_ENFORCE_LT( static_cast(*std::max_element(lbl, lbl + seq_length)), tag_num, "An invalid tag label that execesses the largest tag number."); @@ -449,7 +449,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel { Tensor* emission_grad) const { const T* w_exps = transition_exps.data(); const T* x_exps = emission_exps.data(); - const int* label_value = label.data(); + const int64_t* label_value = label.data(); T* beta_value = beta->data(); auto x_dims = emission_exps.dims(); diff --git a/python/paddle/v2/fluid/layer_helper.py b/python/paddle/v2/fluid/layer_helper.py index 5697eaa460cf21bf73add1b460947e4f3d4edfc3..e40551ca73e991edd8e1d1df5b103c36367b7050 100644 --- a/python/paddle/v2/fluid/layer_helper.py +++ b/python/paddle/v2/fluid/layer_helper.py @@ -126,7 +126,10 @@ class LayerHelper(object): self.startup_program.global_block().create_parameter( dtype=dtype, shape=shape, **attr_copy) return self.main_program.global_block().create_parameter( - name=attr_copy['name'], dtype=dtype, shape=shape) + name=attr_copy['name'], + dtype=dtype, + shape=shape, + trainable=attr_copy.get('trainable', True)) def create_tmp_variable(self, dtype): return self.main_program.current_block().create_var( diff --git a/python/paddle/v2/fluid/layers.py b/python/paddle/v2/fluid/layers.py index abd4b22e8b68f3b5c3e961df83db34e419e7f4d5..fac91aac97267b1ecc867bb9b0b1f8fd40f2f299 100644 --- a/python/paddle/v2/fluid/layers.py +++ b/python/paddle/v2/fluid/layers.py @@ -112,6 +112,7 @@ def fc(input, def embedding(input, size, is_sparse=False, + param_initializer=None, param_attr=None, data_type='float32', main_program=None, @@ -136,9 +137,16 @@ def embedding(input, to the LayerHelper constructor. """ + + def _get_default_param_initializer(): + return XavierInitializer() + helper = LayerHelper('embedding', **locals()) w = helper.create_parameter( - attr=helper.param_attr, shape=size, dtype=data_type) + attr=helper.param_attr, + shape=size, + dtype=data_type, + initializer=param_initializer or _get_default_param_initializer()) tmp = helper.create_tmp_variable(data_type) helper.append_op( type='lookup_table', @@ -460,6 +468,41 @@ def sums(input, main_program=None, startup_program=None): return out +def linear_chain_crf(input, + label, + param_attr=None, + param_initializer=None, + main_program=None, + startup_program=None): + def _get_default_param_initializer(): + return XavierInitializer() + + helper = LayerHelper('linear_chain_crf', **locals()) + size = input.shape[1] + transition = helper.create_parameter( + attr=helper.param_attr, + shape=[size + 2, size], + dtype=helper.input_dtype(), + initializer=param_initializer or _get_default_param_initializer()) + alpha = helper.create_tmp_variable(dtype=helper.input_dtype()) + emission_exps = helper.create_tmp_variable(dtype=helper.input_dtype()) + transition_exps = helper.create_tmp_variable(dtype=helper.input_dtype()) + log_likelihood = helper.create_tmp_variable(dtype=helper.input_dtype()) + helper.append_op( + type='linear_chain_crf', + inputs={"Emission": [input], + "Transition": transition, + "Label": label}, + outputs={ + "Alpha": [alpha], + "EmissionExps": [emission_exps], + "TransitionExps": transition_exps, + "LogLikelihood": log_likelihood + }) + + return log_likelihood + + def assign(input, output, main_program=None, startup_program=None): helper = LayerHelper('assign', **locals()) helper.append_op( diff --git a/python/paddle/v2/fluid/optimizer.py b/python/paddle/v2/fluid/optimizer.py index d2841df6af7a0d860c239db952c767c995d30ba4..87a478c2903b77d955ebde49a4a0e507c9e9ffd3 100644 --- a/python/paddle/v2/fluid/optimizer.py +++ b/python/paddle/v2/fluid/optimizer.py @@ -170,7 +170,8 @@ class Optimizer(object): optimize_ops = [] for param_and_grad in parameters_and_grads: - if param_and_grad[1] is not None: + if param_and_grad[0].trainable is True and param_and_grad[ + 1] is not None: optimize_op = self._append_optimize_op(loss.block, param_and_grad) optimize_ops.append(optimize_op) diff --git a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py new file mode 100644 index 0000000000000000000000000000000000000000..f66e6e748b76dec53a9e24b5b352d31395ce6bde --- /dev/null +++ b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py @@ -0,0 +1,192 @@ +import numpy as np +import paddle.v2 as paddle +import paddle.v2.dataset.conll05 as conll05 +import paddle.v2.fluid.core as core +import paddle.v2.fluid.framework as framework +import paddle.v2.fluid.layers as layers +from paddle.v2.fluid.executor import Executor, g_scope +from paddle.v2.fluid.optimizer import SGDOptimizer + +word_dict, verb_dict, label_dict = conll05.get_dict() +word_dict_len = len(word_dict) +label_dict_len = len(label_dict) +pred_len = len(verb_dict) + +mark_dict_len = 2 +word_dim = 32 +mark_dim = 5 +hidden_dim = 512 +depth = 8 +mix_hidden_lr = 1e-3 + +IS_SPARSE = True +PASS_NUM = 10 +BATCH_SIZE = 20 + +embedding_name = 'emb' + + +def load_parameter(file_name, h, w): + with open(file_name, 'rb') as f: + f.read(16) # skip header. + return np.fromfile(f, dtype=np.float32).reshape(h, w) + + +def db_lstm(): + # 8 features + word = layers.data(name='word_data', shape=[1], data_type='int64') + predicate = layers.data(name='verb_data', shape=[1], data_type='int64') + ctx_n2 = layers.data(name='ctx_n2_data', shape=[1], data_type='int64') + ctx_n1 = layers.data(name='ctx_n1_data', shape=[1], data_type='int64') + ctx_0 = layers.data(name='ctx_0_data', shape=[1], data_type='int64') + ctx_p1 = layers.data(name='ctx_p1_data', shape=[1], data_type='int64') + ctx_p2 = layers.data(name='ctx_p2_data', shape=[1], data_type='int64') + mark = layers.data(name='mark_data', shape=[1], data_type='int64') + + predicate_embedding = layers.embedding( + input=predicate, + size=[pred_len, word_dim], + data_type='float32', + is_sparse=IS_SPARSE, + param_attr={'name': 'vemb'}) + + mark_embedding = layers.embedding( + input=mark, + size=[mark_dict_len, mark_dim], + data_type='float32', + is_sparse=IS_SPARSE) + + word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] + emb_layers = [ + layers.embedding( + size=[word_dict_len, word_dim], + input=x, + param_attr={'name': embedding_name, + 'trainable': False}) for x in word_input + ] + emb_layers.append(predicate_embedding) + emb_layers.append(mark_embedding) + + hidden_0_layers = [ + layers.fc(input=emb, size=hidden_dim) for emb in emb_layers + ] + + hidden_0 = layers.sums(input=hidden_0_layers) + + lstm_0 = layers.dynamic_lstm( + input=hidden_0, + size=hidden_dim, + candidate_activation='relu', + gate_activation='sigmoid', + cell_activation='sigmoid') + + # stack L-LSTM and R-LSTM with direct edges + input_tmp = [hidden_0, lstm_0] + + for i in range(1, depth): + mix_hidden = layers.sums(input=[ + layers.fc(input=input_tmp[0], size=hidden_dim), + layers.fc(input=input_tmp[1], size=hidden_dim) + ]) + + lstm = layers.dynamic_lstm( + input=mix_hidden, + size=hidden_dim, + candidate_activation='relu', + gate_activation='sigmoid', + cell_activation='sigmoid', + is_reverse=((i % 2) == 1)) + + input_tmp = [mix_hidden, lstm] + + feature_out = layers.sums(input=[ + layers.fc(input=input_tmp[0], size=label_dict_len), + layers.fc(input=input_tmp[1], size=label_dict_len) + ]) + + return feature_out + + +def to_lodtensor(data, place): + seq_lens = [len(seq) for seq in data] + cur_len = 0 + lod = [cur_len] + for l in seq_lens: + cur_len += l + lod.append(cur_len) + flattened_data = np.concatenate(data, axis=0).astype("int64") + flattened_data = flattened_data.reshape([len(flattened_data), 1]) + res = core.LoDTensor() + res.set(flattened_data, place) + res.set_lod([lod]) + return res + + +def main(): + # define network topology + feature_out = db_lstm() + target = layers.data(name='target', shape=[1], data_type='int64') + crf_cost = layers.linear_chain_crf( + input=feature_out, + label=target, + param_attr={"name": 'crfw', + "learning_rate": mix_hidden_lr}) + avg_cost = layers.mean(x=crf_cost) + # TODO(qiao) + # 1. add crf_decode_layer and evaluator + # 2. use other optimizer and check why out will be NAN + sgd_optimizer = SGDOptimizer(learning_rate=0.0001) + opts = sgd_optimizer.minimize(avg_cost) + + train_data = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.conll05.test(), buf_size=8192), + batch_size=BATCH_SIZE) + place = core.CPUPlace() + exe = Executor(place) + + exe.run(framework.default_startup_program()) + + embedding_param = g_scope.find_var(embedding_name).get_tensor() + embedding_param.set( + load_parameter(conll05.get_embedding(), word_dict_len, word_dim), place) + + batch_id = 0 + for pass_id in xrange(PASS_NUM): + for data in train_data(): + word_data = to_lodtensor(map(lambda x: x[0], data), place) + ctx_n2_data = to_lodtensor(map(lambda x: x[1], data), place) + ctx_n1_data = to_lodtensor(map(lambda x: x[2], data), place) + ctx_0_data = to_lodtensor(map(lambda x: x[3], data), place) + ctx_p1_data = to_lodtensor(map(lambda x: x[4], data), place) + ctx_p2_data = to_lodtensor(map(lambda x: x[5], data), place) + verb_data = to_lodtensor(map(lambda x: x[6], data), place) + mark_data = to_lodtensor(map(lambda x: x[7], data), place) + target = to_lodtensor(map(lambda x: x[8], data), place) + + outs = exe.run(framework.default_main_program(), + feed={ + 'word_data': word_data, + 'ctx_n2_data': ctx_n2_data, + 'ctx_n1_data': ctx_n1_data, + 'ctx_0_data': ctx_0_data, + 'ctx_p1_data': ctx_p1_data, + 'ctx_p2_data': ctx_p2_data, + 'verb_data': verb_data, + 'mark_data': mark_data, + 'target': target + }, + fetch_list=[avg_cost]) + avg_cost_val = np.array(outs[0]) + + if batch_id % 10 == 0: + print("avg_cost=" + str(avg_cost_val)) + + # exit early for CI + exit(0) + + batch_id = batch_id + 1 + + +if __name__ == '__main__': + main() diff --git a/python/paddle/v2/fluid/tests/test_gru_unit_op.py b/python/paddle/v2/fluid/tests/test_gru_unit_op.py index f356f6e9ec0da2d3e1fb67638d81e8d54c544f53..501d5aa5797d6def708338692f0861657f951ef7 100644 --- a/python/paddle/v2/fluid/tests/test_gru_unit_op.py +++ b/python/paddle/v2/fluid/tests/test_gru_unit_op.py @@ -28,8 +28,8 @@ def relu(x): class TestGRUUnitOp(OpTest): - batch_size = 3 - frame_size = 5 + batch_size = 5 + frame_size = 10 activate = { GRUActivationType.identity: identity, GRUActivationType.sigmoid: sigmoid, @@ -77,7 +77,7 @@ class TestGRUUnitOp(OpTest): c = self.activate[self.attrs['activation']](np.dot(r_h_p, w_c) + g[:, frame_size * 2:]) g = np.hstack((u_r, c)) - h = u * h_p + (1 - u) * c + h = u * c + (1 - u) * h_p self.outputs = { 'Gate': g.astype('float64'), 'ResetHiddenPrev': r_h_p.astype('float64'), @@ -92,10 +92,7 @@ class TestGRUUnitOp(OpTest): self.check_output() def test_check_grad(self): - self.check_grad( - ['Input', 'HiddenPrev', 'Weight'], - ['Hidden', 'ResetHiddenPrev', 'Gate'], - max_relative_error=0.007) + self.check_grad(['Input', 'HiddenPrev', 'Weight'], ['Hidden']) class TestGRUUnitOpWithBias(TestGRUUnitOp): @@ -104,18 +101,20 @@ class TestGRUUnitOpWithBias(TestGRUUnitOp): frame_size = self.frame_size super(TestGRUUnitOpWithBias, self).set_inputs() self.inputs['Bias'] = np.random.uniform( - -0.1, 0.1, (1, frame_size * 3)).astype('float32') + -0.1, 0.1, (1, frame_size * 3)).astype('float64') self.attrs = { 'activation': GRUActivationType.identity, 'gate_activation': GRUActivationType.sigmoid } def test_check_grad(self): + self.check_grad(['Input', 'HiddenPrev', 'Weight', 'Bias'], ['Hidden']) + + def test_check_grad_ingore_input(self): self.check_grad( - ['Input', 'HiddenPrev', 'Weight', 'Bias'], ['Hidden'], - max_relative_error=0.007) + ['HiddenPrev', 'Weight', 'Bias'], ['Hidden'], + no_grad_set=set('Input')) if __name__ == '__main__': - exit(0) # FIXME(yuyang18): This unittest is not pass. Fix it later unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_layers.py b/python/paddle/v2/fluid/tests/test_layers.py index 3d18e7ce3a4dc6c6b917a1000de39fca71f6ac18..d3dc45742d92dc61b81d9cdc04056c5d5bdc2b63 100644 --- a/python/paddle/v2/fluid/tests/test_layers.py +++ b/python/paddle/v2/fluid/tests/test_layers.py @@ -1,8 +1,8 @@ +import unittest + import paddle.v2.fluid.layers as layers import paddle.v2.fluid.nets as nets from paddle.v2.fluid.framework import Program -import paddle.v2.fluid.core as core -import unittest class TestBook(unittest.TestCase): @@ -20,7 +20,8 @@ class TestBook(unittest.TestCase): avg_cost = layers.mean(x=cost, main_program=program) self.assertIsNotNone(avg_cost) program.append_backward(avg_cost) - print str(program) + + # print str(program) def test_recognize_digits_mlp(self): program = Program() @@ -49,7 +50,7 @@ class TestBook(unittest.TestCase): input=predict, label=label, main_program=program) avg_cost = layers.mean(x=cost, main_program=program) self.assertIsNotNone(avg_cost) - print str(program) + # print str(program) def test_simple_conv2d(self): program = Program() @@ -64,7 +65,7 @@ class TestBook(unittest.TestCase): filter_size=[4, 4], main_program=program) - print str(program) + # print str(program) def test_recognize_digits_conv(self): program = Program() @@ -103,7 +104,7 @@ class TestBook(unittest.TestCase): program.append_backward(avg_cost) - print str(program) + # print str(program) def test_word_embedding(self): program = Program() @@ -164,7 +165,24 @@ class TestBook(unittest.TestCase): avg_cost = layers.mean(x=cost, main_program=program) self.assertIsNotNone(avg_cost) - print str(program) + # print str(program) + + def test_linear_chain_crf(self): + program = Program() + + # Change g_program, so the rest layers use `g_program` + images = layers.data( + name='pixel', + shape=[784], + data_type='float32', + main_program=program) + label = layers.data( + name='label', shape=[1], data_type='int32', main_program=program) + hidden = layers.fc(input=images, size=128, main_program=program) + crf = layers.linear_chain_crf( + input=hidden, label=label, main_program=program) + + # print str(program) if __name__ == '__main__': diff --git a/python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py b/python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py index 6f06a66c825b37ee91214efc0a29a58f0b9057f9..c26634ff20c46e484d600c758be386ec8327d1c1 100644 --- a/python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py +++ b/python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py @@ -104,7 +104,7 @@ class TestLinearChainCrfOp(OpTest): transition_exps = np.exp(transition) labels = np.random.randint( - low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int32") + low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int64") self.inputs = { "Emission": (emission, lod),