diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py index c0a6df831acbfe2654a5941cf95c91343992ef13..4d8bca4d2430a248ccf421572bdafdffc3a3003a 100644 --- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py @@ -37,7 +37,7 @@ depth = 8 mix_hidden_lr = 1e-3 IS_SPARSE = True -PASS_NUM = 10 +PASS_NUM = 100 BATCH_SIZE = 10 embedding_name = 'emb' @@ -77,7 +77,8 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, emb_layers.append(mark_embedding) hidden_0_layers = [ - fluid.layers.fc(input=emb, size=hidden_dim) for emb in emb_layers + fluid.layers.fc(input=emb, size=hidden_dim, act='tanh') + for emb in emb_layers ] hidden_0 = fluid.layers.sums(input=hidden_0_layers) @@ -94,8 +95,8 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, for i in range(1, depth): mix_hidden = fluid.layers.sums(input=[ - fluid.layers.fc(input=input_tmp[0], size=hidden_dim), - fluid.layers.fc(input=input_tmp[1], size=hidden_dim) + fluid.layers.fc(input=input_tmp[0], size=hidden_dim, act='tanh'), + fluid.layers.fc(input=input_tmp[1], size=hidden_dim, act='tanh') ]) lstm = fluid.layers.dynamic_lstm( @@ -109,8 +110,8 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, input_tmp = [mix_hidden, lstm] feature_out = fluid.layers.sums(input=[ - fluid.layers.fc(input=input_tmp[0], size=label_dict_len), - fluid.layers.fc(input=input_tmp[1], size=label_dict_len) + fluid.layers.fc(input=input_tmp[0], size=label_dict_len, act='tanh'), + fluid.layers.fc(input=input_tmp[1], size=label_dict_len, act='tanh') ]) return feature_out @@ -171,7 +172,7 @@ def train(use_cuda, save_dirname=None, is_local=True): # check other optimizers and check why out will be NAN sgd_optimizer = fluid.optimizer.SGD( learning_rate=fluid.layers.exponential_decay( - learning_rate=0.0001, + learning_rate=0.01, decay_steps=100000, decay_rate=0.5, staircase=True)) @@ -233,7 +234,7 @@ def train(use_cuda, save_dirname=None, is_local=True): print("second per batch: " + str((time.time( ) - start_time) / batch_id)) # Set the threshold low to speed up the CI test - if float(pass_precision) > 0.05: + if float(pass_precision) > 0.01: if save_dirname is not None: # TODO(liuyiqun): Change the target to crf_decode fluid.io.save_inference_model(save_dirname, [