Ce language model (#2193)

* add ce for language_model * update ce * update ce

Ce language model (#2193)
* add ce for language_model * update ce * update ce
99f5b2d6 · zhengya01 · Hongyu Liu · a4a7df17 · 99f5b2d6 · 99f5b2d6
4 changed file
--- a/PaddleNLP/language_model/.run_ce.sh
+++ b/PaddleNLP/language_model/.run_ce.sh
+export CUDA_VISIBLE_DEVICES=0
+
+python  train.py \
+    --data_path data/simple-examples/data/ \
+    --model_type test \
+    --use_gpu True \
+    --rnn_model static \
+    --enable_ce | python _ce.py
+
+python  train.py \
+    --data_path data/simple-examples/data/ \
+    --model_type test \
+    --use_gpu True \
+    --rnn_model padding \
+    --enable_ce | python _ce.py
--- a/PaddleNLP/language_model/__init__.py
+++ b/PaddleNLP/language_model/__init__.py
--- a/PaddleNLP/language_model/_ce.py
+++ b/PaddleNLP/language_model/_ce.py
+# this file is only used for continuous evaluation test!
+
+import os
+import sys
+sys.path.append(os.environ['ceroot'])
+from kpi import CostKpi
+from kpi import DurationKpi
+
+imikolov_20_avg_ppl_kpi_card1 = CostKpi('lstm_language_model_static_loss_card1', 0.01, 0)
+imikolov_20_pass_duration_kpi_card1 = DurationKpi(
+    'lstm_language_model_static_duration_card1', 0.03, 0, actived=True)
+imikolov_20_avg_ppl_kpi_card1_padding = CostKpi('lstm_language_model_padding_loss_card1', 0.01, 0)
+imikolov_20_pass_duration_kpi_card1_padding = DurationKpi(
+    'lstm_language_model_padding_duration_card1', 0.03, 0, actived=True)
+
+tracking_kpis = [
+    imikolov_20_avg_ppl_kpi_card1,
+    imikolov_20_pass_duration_kpi_card1,
+    imikolov_20_avg_ppl_kpi_card1_padding,
+    imikolov_20_pass_duration_kpi_card1_padding,
+]
+
+
+def parse_log(log):
+    '''
+    This method should be implemented by model developers.
+
+    The suggestion:
+
+    each line in the log should be key, value, for example:
+
+    "
+    train_cost\t1.0
+    test_cost\t1.0
+    train_cost\t1.0
+    train_cost\t1.0
+    train_acc\t1.2
+    "
+    '''
+    for line in log.split('\n'):
+        fs = line.strip().split('\t')
+        print(fs)
+        if len(fs) == 3 and fs[0] == 'ptblm':
+            kpi_name = fs[1]
+            kpi_value = float(fs[2])
+            yield kpi_name, kpi_value
+
+
+def log_to_ce(log):
+    kpi_tracker = {}
+    for kpi in tracking_kpis:
+        kpi_tracker[kpi.name] = kpi
+
+    for (kpi_name, kpi_value) in parse_log(log):
+        print(kpi_name, kpi_value)
+        kpi_tracker[kpi_name].add_record(kpi_value)
+        kpi_tracker[kpi_name].persist()
+
+
+if __name__ == '__main__':
+    log = sys.stdin.read()
+    log_to_ce(log)
--- a/PaddleNLP/language_model/train.py
+++ b/PaddleNLP/language_model/train.py
@@ -286,9 +286,10 @@ def train():
        print("train ppl", ppl[0])

        if epoch_id == max_epoch - 1 and args.enable_ce:
-            print("ptblm\tlstm_language_model_duration\t%s" %
-                        (total_time / max_epoch))
-            print("ptblm\tlstm_language_model_loss\t%s" % ppl[0])
+            card_num = get_cards()
+            print("ptblm\tlstm_language_model_duration_card%d\t%s" %
+                        (card_num, total_time / max_epoch))
+            print("ptblm\tlstm_language_model_loss_card%d\t%s" % (card_num, ppl[0]))

        model_path = os.path.join("model_new/", str(epoch_id))
        if not os.path.isdir(model_path):
@@ -301,5 +302,13 @@ def train():
    print("test ppl", test_ppl[0])


+def get_cards():
+    num = 0
+    cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
+    if cards != '':
+        num = len(cards.split(","))
+    return num
+
+
 if __name__ == '__main__':
    train()