From 27d556df735273df877595b5a7d5a607bb958828 Mon Sep 17 00:00:00 2001
From: zhengya01 <43601548+zhengya01@users.noreply.github.com>
Date: Wed, 13 Nov 2019 19:58:22 +0800
Subject: [PATCH] add ce for xlnet (#3900)

* update .run_ce.sh

* chmod +x .run_ce.sh

* add ce

* add ce

* add ce

* add ce

* add ce for xlnet

* add ce for xlnet

* add xlnet ce
---
 PaddleNLP/PaddleLARK/XLNet/.run_ce.sh        | 31 +++++++++
 PaddleNLP/PaddleLARK/XLNet/_ce.py            | 67 ++++++++++++++++++++
 PaddleNLP/PaddleLARK/XLNet/run_classifier.py | 11 ++--
 PaddleNLP/PaddleLARK/XLNet/utils/cards.py    | 26 ++++++++
 4 files changed, 128 insertions(+), 7 deletions(-)
 create mode 100644 PaddleNLP/PaddleLARK/XLNet/.run_ce.sh
 create mode 100644 PaddleNLP/PaddleLARK/XLNet/_ce.py
 create mode 100644 PaddleNLP/PaddleLARK/XLNet/utils/cards.py

diff --git a/PaddleNLP/PaddleLARK/XLNet/.run_ce.sh b/PaddleNLP/PaddleLARK/XLNet/.run_ce.sh
new file mode 100644
index 00000000..7fa5fc82
--- /dev/null
+++ b/PaddleNLP/PaddleLARK/XLNet/.run_ce.sh
@@ -0,0 +1,31 @@
+
+train(){
+python run_classifier.py \
+    --data_dir data/STS-B \
+    --verbose True \
+    --shuffle false \
+    --init_checkpoint xlnet_cased_L-12_H-768_A-12/params \
+    --predict_dir exp/sts-b \
+    --model_config_path xlnet_cased_L-12_H-768_A-12/xlnet_config.json \
+    --uncased False \
+    --save_steps 50 \
+    --train_steps 50 \
+    --epoch 1 \
+    --skip_steps 10 \
+    --validation_steps 30 \
+    --task_name sts-b \
+    --warmup_steps 5 \
+    --random_seed 100 \
+    --spiece_model_file xlnet_cased_L-12_H-768_A-12/spiece.model \
+    --checkpoints checkpoints_sts-b \
+    --is_regression True \
+    --use_cuda True \
+    --eval_batch_size 4 \
+    --enable_ce
+}
+
+export CUDA_VISIBLE_DEVICES=0
+train | python _ce.py
+
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+train | python _ce.py
diff --git a/PaddleNLP/PaddleLARK/XLNet/_ce.py b/PaddleNLP/PaddleLARK/XLNet/_ce.py
new file mode 100644
index 00000000..09443427
--- /dev/null
+++ b/PaddleNLP/PaddleLARK/XLNet/_ce.py
@@ -0,0 +1,67 @@
+####this file is only used for continuous evaluation test!
+
+import os
+import sys
+sys.path.insert(0, os.environ['ceroot'])
+from kpi import CostKpi, DurationKpi, AccKpi
+
+#### NOTE kpi.py should shared in models in some way!!!!
+
+
+train_duration_sts_b_card1 = DurationKpi(
+    'train_duration_sts_b_card1', 0.01, 0, actived=True)
+train_cost_sts_b_card1 = CostKpi(
+    'train_cost_sts_b_card1', 0.02, 0, actived=True)
+train_duration_sts_b_card4 = DurationKpi(
+    'train_duration_sts_b_card4', 0.04, 0, actived=True)
+train_cost_sts_b_card4 = CostKpi(
+    'train_cost_sts_b_card4', 0.08, 0, actived=False)
+
+tracking_kpis = [
+    train_duration_sts_b_card1,
+    train_cost_sts_b_card1,
+    train_duration_sts_b_card4,
+    train_cost_sts_b_card4,
+]
+
+
+def parse_log(log):
+    '''
+    This method should be implemented by model developers.
+    The suggestion:
+    each line in the log should be key, value, for example:
+    "
+    train_cost\t1.0
+    test_cost\t1.0
+    train_cost\t1.0
+    train_cost\t1.0
+    train_acc\t1.2
+    "
+    '''
+    for line in log.split('\n'):
+        fs = line.strip().split('\t')
+        print(fs)
+        if len(fs) == 3 and fs[0] == 'kpis':
+            print("-----%s" % fs)
+            kpi_name = fs[1]
+            kpi_value = float(fs[2])
+            yield kpi_name, kpi_value
+
+
+def log_to_ce(log):
+    kpi_tracker = {}
+    for kpi in tracking_kpis:
+        kpi_tracker[kpi.name] = kpi
+
+    for (kpi_name, kpi_value) in parse_log(log):
+        print(kpi_name, kpi_value)
+        kpi_tracker[kpi_name].add_record(kpi_value)
+        kpi_tracker[kpi_name].persist()
+
+
+if __name__ == '__main__':
+    log = sys.stdin.read()
+    print("*****")
+    print(log)
+    print("****")
+    log_to_ce(log)
diff --git a/PaddleNLP/PaddleLARK/XLNet/run_classifier.py b/PaddleNLP/PaddleLARK/XLNet/run_classifier.py
index 8b5a0c80..795eae54 100644
--- a/PaddleNLP/PaddleLARK/XLNet/run_classifier.py
+++ b/PaddleNLP/PaddleLARK/XLNet/run_classifier.py
@@ -41,6 +41,7 @@ from model.classifier import create_model
 from optimization import optimization
 from utils.args import ArgumentGroup, print_arguments, check_cuda
 from utils.init import init_pretraining_params, init_checkpoint
+from utils.cards import get_cards
 
 num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
 
@@ -432,20 +433,16 @@ def main(args):
         if args.enable_ce:
             card_num = get_cards()
             ce_cost = 0
-            ce_acc = 0
             ce_time = 0
             try:
                 ce_cost = ce_info[-2][0]
-                ce_acc = ce_info[-2][1]
-                ce_time = ce_info[-2][2]
+                ce_time = ce_info[-2][1]
             except:
                 print("ce info error")
             print("kpis\ttrain_duration_%s_card%s\t%s" %
-                (args.task_name, card_num, ce_time))
+                (args.task_name.replace("-", "_"), card_num, ce_time))
             print("kpis\ttrain_cost_%s_card%s\t%f" %
-                (args.task_name, card_num, ce_cost))
-            print("kpis\ttrain_acc_%s_card%s\t%f" %
-                (args.task_name, card_num, ce_acc))
+                (args.task_name.replace("-", "_"), card_num, ce_cost))
 
 
     # final eval on dev set
diff --git a/PaddleNLP/PaddleLARK/XLNet/utils/cards.py b/PaddleNLP/PaddleLARK/XLNet/utils/cards.py
new file mode 100644
index 00000000..70c58ee3
--- /dev/null
+++ b/PaddleNLP/PaddleLARK/XLNet/utils/cards.py
@@ -0,0 +1,26 @@
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+
+def get_cards():
+    """
+    get gpu cards number
+    """
+    num = 0
+    cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
+    if cards != '':
+        num = len(cards.split(","))
+    return num
-- 
GitLab