From 87ff95d92828347d088eea5bce25e38c663b549b Mon Sep 17 00:00:00 2001 From: Siddharth Goyal Date: Thu, 24 May 2018 13:36:32 -0700 Subject: [PATCH] [Test-driven] Implementing label_semantic_role with new API (#10757) * Update label role implementation to use new API * Try trainable embedding layer * Fix feed order * Add infer call --- .../tests/book/high-level-api/CMakeLists.txt | 1 + .../label_semantic_roles/CMakeLists.txt | 7 + ...py => test_label_semantic_roles_newapi.py} | 136 ++++++++++-------- 3 files changed, 87 insertions(+), 57 deletions(-) create mode 100644 python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/CMakeLists.txt rename python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/{no_test_label_semantic_roles.py => test_label_semantic_roles_newapi.py} (66%) diff --git a/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt index c1e2656692a..6698a1914ab 100644 --- a/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt +++ b/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt @@ -10,5 +10,6 @@ add_subdirectory(fit_a_line) add_subdirectory(recognize_digits) add_subdirectory(image_classification) add_subdirectory(understand_sentiment) +add_subdirectory(label_semantic_roles) add_subdirectory(word2vec) add_subdirectory(recommender_system) diff --git a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/CMakeLists.txt new file mode 100644 index 00000000000..673c965b662 --- /dev/null +++ b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/CMakeLists.txt @@ -0,0 +1,7 @@ +file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") + +# default test +foreach(src ${TEST_OPS}) + py_test(${src} SRCS ${src}.py) +endforeach() diff --git a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/no_test_label_semantic_roles.py b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py similarity index 66% rename from python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/no_test_label_semantic_roles.py rename to python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py index fe36e55bb53..5f30ce195d4 100755 --- a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/no_test_label_semantic_roles.py +++ b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py @@ -16,21 +16,23 @@ from __future__ import print_function import paddle import paddle.fluid as fluid -import numpy +import numpy as np WORD_DICT, VERB_DICT, LABEL_DICT = paddle.dataset.conll05.get_dict() WORD_DICT_LEN = len(WORD_DICT) LABEL_DICT_LEN = len(LABEL_DICT) PRED_DICT_LEN = len(VERB_DICT) MARK_DICT_LEN = 2 +IS_SPARSE = True +BATCH_SIZE = 10 +EMBEDDING_NAME = 'emb' -def lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark): +def lstm_net(): WORD_DIM = 32 MARK_DIM = 5 HIDDEN_DIM = 512 DEPTH = 8 - EMBEDDING_NAME = 'emb' # Data definitions word = fluid.layers.data( @@ -69,8 +71,9 @@ def lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark): fluid.layers.embedding( size=[WORD_DICT_LEN, WORD_DIM], input=x, - param_attr=fluid.ParamAttr( - name=EMBEDDING_NAME, trainable=False)) for x in word_input + param_attr=fluid.ParamAttr(name=EMBEDDING_NAME)) + for x in word_input + #name=EMBEDDING_NAME, trainable=False)) for x in word_input ] emb_layers.append(predicate_embedding) emb_layers.append(mark_embedding) @@ -116,21 +119,16 @@ def lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark): return feature_out -def inference_network(): - predict = lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, - mark) +def inference_program(): + predict = lstm_net() - crf_decode = fluid.layers.crf_decoding( - input=feature_out, param_attr=fluid.ParamAttr(name='crfw')) + return predict - return crf_decode - -def train_network(): +def train_program(): MIX_HIDDEN_LR = 1e-3 - predict = lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, - mark) + predict = lstm_net() target = fluid.layers.data( name='target', shape=[1], dtype='int64', lod_level=1) crf_cost = fluid.layers.linear_chain_crf( @@ -140,44 +138,66 @@ def train_network(): name='crfw', learning_rate=MIX_HIDDEN_LR)) avg_cost = fluid.layers.mean(crf_cost) - return avg_cost + return [avg_cost] -def train(use_cuda, save_path): - BATCH_SIZE = 128 - EPOCH_NUM = 1 +def train(use_cuda, train_program, save_path): + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + optimizer = fluid.optimizer.SGD(learning_rate=0.01) - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.conll05.train(), buf_size=8192), - batch_size=BATCH_SIZE) - test_reader = paddle.batch( - paddle.dataset.conll05.test(), batch_size=BATCH_SIZE) + trainer = fluid.Trainer( + train_func=train_program, place=place, optimizer=optimizer) - def event_handler(event): - if isinstance(event, fluid.EndIteration): - if (event.batch_id % 10) == 0: - avg_cost = trainer.test(reader=test_reader) + feed_order = [ + 'word_data', 'ctx_n2_data', 'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data', + 'ctx_p2_data', 'verb_data', 'mark_data', 'target' + ] - print('BatchID {0:04}, Loss {1:2.2}'.format(event.batch_id + 1, - avg_cost)) + #embedding_param = fluid.global_scope().find_var( + # EMBEDDING_NAME).get_tensor() + #embedding_param.set( + # load_parameter(conll05.get_embedding(), WORD_DICT_LEN, WORD_DIM), + # place) - if avg_cost > 0.01: # Low threshold for speeding up CI - trainer.save_params(save_path) - return + def event_handler(event): + if isinstance(event, fluid.EndEpochEvent): + test_reader = paddle.batch( + paddle.dataset.conll05.test(), batch_size=BATCH_SIZE) + avg_cost_set = trainer.test( + reader=test_reader, feed_order=feed_order) + + # get avg cost + avg_cost = np.array(avg_cost_set).mean() + + print("avg_cost: %s" % avg_cost) + + if float(avg_cost) < 100.0: # Large value to increase CI speed + trainer.save_params(save_path) + else: + print('BatchID {0}, Test Loss {1:0.2}'.format(event.epoch + 1, + float(avg_cost))) + if math.isnan(float(avg_cost)): + sys.exit("got NaN loss, training failed.") + + elif isinstance(event, fluid.EndStepEvent): + print("Step {0}, Epoch {1} Metrics {2}".format( + event.step, event.epoch, map(np.array, event.metrics))) + if event.step == 1: # Run 2 iterations to speed CI + trainer.save_params(save_path) + trainer.stop() - place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - sgd_optimizer = fluid.optimizer.SGD( - learning_rate=fluid.layers.exponential_decay( - learning_rate=0.01, - decay_steps=100000, - decay_rate=0.5, - staircase=True)) - trainer = fluid.Trainer(train_network, optimizer=sgd_optimizer, place=place) - trainer.train(train_reader, EPOCH_NUM, event_handler=event_handler) + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.conll05.test(), buf_size=8192), + batch_size=BATCH_SIZE) + trainer.train( + num_epochs=1, + event_handler=event_handler, + reader=train_reader, + feed_order=feed_order) -def infer(use_cuda, save_path): +def infer(use_cuda, inference_program, save_path): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() inferencer = fluid.Inferencer( inference_program, param_path=save_path, place=place) @@ -201,26 +221,28 @@ def infer(use_cuda, save_path): ctx_p2 = create_random_lodtensor(lod, place, low=0, high=WORD_DICT_LEN - 1) mark = create_random_lodtensor(lod, place, low=0, high=MARK_DICT_LEN - 1) - results = inferencer.infer({ - 'word_data': word, - 'verb_data': pred, - 'ctx_n2_data': ctx_n2, - 'ctx_n1_data': ctx_n1, - 'ctx_0_data': ctx_0, - 'ctx_p1_data': ctx_p1, - 'ctx_p2_data': ctx_p2, - 'mark_data': mark - }) + results = inferencer.infer( + { + 'word_data': word, + 'verb_data': pred, + 'ctx_n2_data': ctx_n2, + 'ctx_n1_data': ctx_n1, + 'ctx_0_data': ctx_0, + 'ctx_p1_data': ctx_p1, + 'ctx_p2_data': ctx_p2, + 'mark_data': mark + }, + return_numpy=False) - print("infer results: ", results) + print("infer results: ", np.array(results[0])) def main(use_cuda): if use_cuda and not fluid.core.is_compiled_with_cuda(): return save_path = "label_semantic_roles.inference.model" - train(use_cuda, save_path) - infer(use_cuda, save_path) + train(use_cuda, train_program, save_path) + infer(use_cuda, inference_program, save_path) if __name__ == '__main__': -- GitLab