[Test-driven] Implementing label_semantic_role with new API (#10757)

* Update label role implementation to use new API * Try trainable embedding layer * Fix feed order * Add infer call

[Test-driven] Implementing label_semantic_role with new API (#10757)
* Update label role implementation to use new API * Try trainable embedding layer * Fix feed order * Add infer call
87ff95d9 · Siddharth Goyal · GitHub · 174d884d · 87ff95d9 · 87ff95d9
3 changed file
--- a/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt
+++ b/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt
@@ -10,5 +10,6 @@ add_subdirectory(fit_a_line)
 add_subdirectory(recognize_digits)
 add_subdirectory(image_classification)
 add_subdirectory(understand_sentiment)
+add_subdirectory(label_semantic_roles)
 add_subdirectory(word2vec)
 add_subdirectory(recommender_system)
--- a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/CMakeLists.txt
+++ b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/CMakeLists.txt
+file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
+string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
+# default test
+foreach(src ${TEST_OPS})
+    py_test(${src} SRCS ${src}.py)
+endforeach()
--- a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/no_test_label_semantic_roles.py
+++ b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/no_test_label_semantic_roles.py
@@ -16,21 +16,23 @@ from __future__ import print_function
 import paddle
 import paddle.fluid as fluid
-import numpy
+import numpy as np
 WORD_DICT, VERB_DICT, LABEL_DICT = paddle.dataset.conll05.get_dict()
 WORD_DICT_LEN = len(WORD_DICT)
 LABEL_DICT_LEN = len(LABEL_DICT)
 PRED_DICT_LEN = len(VERB_DICT)
 MARK_DICT_LEN = 2
+IS_SPARSE = True
+BATCH_SIZE = 10
+EMBEDDING_NAME = 'emb'
-def lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark):
+def lstm_net():
    WORD_DIM = 32
    MARK_DIM = 5
    HIDDEN_DIM = 512
    DEPTH = 8
-    EMBEDDING_NAME = 'emb'
    # Data definitions
    word = fluid.layers.data(
@@ -69,8 +71,9 @@ def lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark):
        fluid.layers.embedding(
            size=[WORD_DICT_LEN, WORD_DIM],
            input=x,
-            param_attr=fluid.ParamAttr(
+            param_attr=fluid.ParamAttr(name=EMBEDDING_NAME))
-                name=EMBEDDING_NAME, trainable=False)) for x in word_input
+        for x in word_input
+        #name=EMBEDDING_NAME, trainable=False)) for x in word_input
    ]
    emb_layers.append(predicate_embedding)
    emb_layers.append(mark_embedding)
@@ -116,21 +119,16 @@ def lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark):
    return feature_out
-def inference_network():
+def inference_program():
-    predict = lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2,
+    predict = lstm_net()
-                       mark)
-    crf_decode = fluid.layers.crf_decoding(
-        input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))
-    return crf_decode
+    return predict
-def train_network():
+def train_program():
    MIX_HIDDEN_LR = 1e-3
-    predict = lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2,
+    predict = lstm_net()
-                       mark)
    target = fluid.layers.data(
        name='target', shape=[1], dtype='int64', lod_level=1)
    crf_cost = fluid.layers.linear_chain_crf(
@@ -140,44 +138,66 @@ def train_network():
            name='crfw', learning_rate=MIX_HIDDEN_LR))
    avg_cost = fluid.layers.mean(crf_cost)
-    return avg_cost
+    return [avg_cost]
-def train(use_cuda, save_path):
+def train(use_cuda, train_program, save_path):
-    BATCH_SIZE = 128
+    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    EPOCH_NUM = 1
+    optimizer = fluid.optimizer.SGD(learning_rate=0.01)
-    train_reader = paddle.batch(
+    trainer = fluid.Trainer(
-        paddle.reader.shuffle(
+        train_func=train_program, place=place, optimizer=optimizer)
-            paddle.dataset.conll05.train(), buf_size=8192),
-        batch_size=BATCH_SIZE)
+    feed_order = [
+        'word_data', 'ctx_n2_data', 'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data',
+        'ctx_p2_data', 'verb_data', 'mark_data', 'target'
+    ]
+    #embedding_param = fluid.global_scope().find_var(
+    #        EMBEDDING_NAME).get_tensor()
+    #embedding_param.set(
+    #        load_parameter(conll05.get_embedding(), WORD_DICT_LEN, WORD_DIM),
+    #        place)
+    def event_handler(event):
+        if isinstance(event, fluid.EndEpochEvent):
            test_reader = paddle.batch(
                paddle.dataset.conll05.test(), batch_size=BATCH_SIZE)
+            avg_cost_set = trainer.test(
+                reader=test_reader, feed_order=feed_order)
-    def event_handler(event):
+            # get avg cost
-        if isinstance(event, fluid.EndIteration):
+            avg_cost = np.array(avg_cost_set).mean()
-            if (event.batch_id % 10) == 0:
-                avg_cost = trainer.test(reader=test_reader)
-                print('BatchID {0:04}, Loss {1:2.2}'.format(event.batch_id + 1,
+            print("avg_cost: %s" % avg_cost)
-                                                            avg_cost))
-                if avg_cost > 0.01:  # Low threshold for speeding up CI
+            if float(avg_cost) < 100.0:  # Large value to increase CI speed
                trainer.save_params(save_path)
-                    return
+            else:
+                print('BatchID {0}, Test Loss {1:0.2}'.format(event.epoch + 1,
+                                                              float(avg_cost)))
+                if math.isnan(float(avg_cost)):
+                    sys.exit("got NaN loss, training failed.")
+        elif isinstance(event, fluid.EndStepEvent):
+            print("Step {0}, Epoch {1} Metrics {2}".format(
+                event.step, event.epoch, map(np.array, event.metrics)))
+            if event.step == 1:  # Run 2 iterations to speed CI
+                trainer.save_params(save_path)
+                trainer.stop()
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+    train_reader = paddle.batch(
-    sgd_optimizer = fluid.optimizer.SGD(
+        paddle.reader.shuffle(
-        learning_rate=fluid.layers.exponential_decay(
+            paddle.dataset.conll05.test(), buf_size=8192),
-            learning_rate=0.01,
+        batch_size=BATCH_SIZE)
-            decay_steps=100000,
+    trainer.train(
-            decay_rate=0.5,
+        num_epochs=1,
-            staircase=True))
+        event_handler=event_handler,
-    trainer = fluid.Trainer(train_network, optimizer=sgd_optimizer, place=place)
+        reader=train_reader,
-    trainer.train(train_reader, EPOCH_NUM, event_handler=event_handler)
+        feed_order=feed_order)
-def infer(use_cuda, save_path):
+def infer(use_cuda, inference_program, save_path):
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    inferencer = fluid.Inferencer(
        inference_program, param_path=save_path, place=place)
@@ -201,7 +221,8 @@ def infer(use_cuda, save_path):
    ctx_p2 = create_random_lodtensor(lod, place, low=0, high=WORD_DICT_LEN - 1)
    mark = create_random_lodtensor(lod, place, low=0, high=MARK_DICT_LEN - 1)
-    results = inferencer.infer({
+    results = inferencer.infer(
+        {
            'word_data': word,
            'verb_data': pred,
            'ctx_n2_data': ctx_n2,
@@ -210,17 +231,18 @@ def infer(use_cuda, save_path):
            'ctx_p1_data': ctx_p1,
            'ctx_p2_data': ctx_p2,
            'mark_data': mark
-    })
+        },
+        return_numpy=False)
-    print("infer results: ", results)
+    print("infer results: ", np.array(results[0]))
 def main(use_cuda):
    if use_cuda and not fluid.core.is_compiled_with_cuda():
        return
    save_path = "label_semantic_roles.inference.model"
-    train(use_cuda, save_path)
+    train(use_cuda, train_program, save_path)
-    infer(use_cuda, save_path)
+    infer(use_cuda, inference_program, save_path)
 if __name__ == '__main__':