add ce for xlnet (#3900)

* update .run_ce.sh * chmod +x .run_ce.sh * add ce * add ce * add ce * add ce * add ce for xlnet * add ce for xlnet * add xlnet ce

add ce for xlnet (#3900)
* update .run_ce.sh * chmod +x .run_ce.sh * add ce * add ce * add ce * add ce * add ce for xlnet * add ce for xlnet * add xlnet ce
27d556df · zhengya01 · kolinwei · f087880c · 27d556df · 27d556df
4 changed file
--- a/PaddleNLP/PaddleLARK/XLNet/.run_ce.sh
+++ b/PaddleNLP/PaddleLARK/XLNet/.run_ce.sh
+
+train(){
+python run_classifier.py \
+    --data_dir data/STS-B \
+    --verbose True \
+    --shuffle false \
+    --init_checkpoint xlnet_cased_L-12_H-768_A-12/params \
+    --predict_dir exp/sts-b \
+    --model_config_path xlnet_cased_L-12_H-768_A-12/xlnet_config.json \
+    --uncased False \
+    --save_steps 50 \
+    --train_steps 50 \
+    --epoch 1 \
+    --skip_steps 10 \
+    --validation_steps 30 \
+    --task_name sts-b \
+    --warmup_steps 5 \
+    --random_seed 100 \
+    --spiece_model_file xlnet_cased_L-12_H-768_A-12/spiece.model \
+    --checkpoints checkpoints_sts-b \
+    --is_regression True \
+    --use_cuda True \
+    --eval_batch_size 4 \
+    --enable_ce
+}
+
+export CUDA_VISIBLE_DEVICES=0
+train | python _ce.py
+
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+train | python _ce.py
--- a/PaddleNLP/PaddleLARK/XLNet/_ce.py
+++ b/PaddleNLP/PaddleLARK/XLNet/_ce.py
+####this file is only used for continuous evaluation test!
+
+import os
+import sys
+sys.path.insert(0, os.environ['ceroot'])
+from kpi import CostKpi, DurationKpi, AccKpi
+
+#### NOTE kpi.py should shared in models in some way!!!!
+
+
+train_duration_sts_b_card1 = DurationKpi(
+    'train_duration_sts_b_card1', 0.01, 0, actived=True)
+train_cost_sts_b_card1 = CostKpi(
+    'train_cost_sts_b_card1', 0.02, 0, actived=True)
+train_duration_sts_b_card4 = DurationKpi(
+    'train_duration_sts_b_card4', 0.04, 0, actived=True)
+train_cost_sts_b_card4 = CostKpi(
+    'train_cost_sts_b_card4', 0.08, 0, actived=False)
+
+tracking_kpis = [
+    train_duration_sts_b_card1,
+    train_cost_sts_b_card1,
+    train_duration_sts_b_card4,
+    train_cost_sts_b_card4,
+]
+
+
+def parse_log(log):
+    '''
+    This method should be implemented by model developers.
+    The suggestion:
+    each line in the log should be key, value, for example:
+    "
+    train_cost\t1.0
+    test_cost\t1.0
+    train_cost\t1.0
+    train_cost\t1.0
+    train_acc\t1.2
+    "
+    '''
+    for line in log.split('\n'):
+        fs = line.strip().split('\t')
+        print(fs)
+        if len(fs) == 3 and fs[0] == 'kpis':
+            print("-----%s" % fs)
+            kpi_name = fs[1]
+            kpi_value = float(fs[2])
+            yield kpi_name, kpi_value
+
+
+def log_to_ce(log):
+    kpi_tracker = {}
+    for kpi in tracking_kpis:
+        kpi_tracker[kpi.name] = kpi
+
+    for (kpi_name, kpi_value) in parse_log(log):
+        print(kpi_name, kpi_value)
+        kpi_tracker[kpi_name].add_record(kpi_value)
+        kpi_tracker[kpi_name].persist()
+
+
+if __name__ == '__main__':
+    log = sys.stdin.read()
+    print("*****")
+    print(log)
+    print("****")
+    log_to_ce(log)
--- a/PaddleNLP/PaddleLARK/XLNet/run_classifier.py
+++ b/PaddleNLP/PaddleLARK/XLNet/run_classifier.py
@@ -41,6 +41,7 @@ from model.classifier import create_model
 from optimization import optimization
 from utils.args import ArgumentGroup, print_arguments, check_cuda
 from utils.init import init_pretraining_params, init_checkpoint
+from utils.cards import get_cards

 num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))

@@ -432,20 +433,16 @@ def main(args):
        if args.enable_ce:
            card_num = get_cards()
            ce_cost = 0
-            ce_acc = 0
            ce_time = 0
            try:
                ce_cost = ce_info[-2][0]
-                ce_acc = ce_info[-2][1]
-                ce_time = ce_info[-2][2]
+                ce_time = ce_info[-2][1]
            except:
                print("ce info error")
            print("kpis\ttrain_duration_%s_card%s\t%s" %
-                (args.task_name, card_num, ce_time))
+                (args.task_name.replace("-", "_"), card_num, ce_time))
            print("kpis\ttrain_cost_%s_card%s\t%f" %
-                (args.task_name, card_num, ce_cost))
-            print("kpis\ttrain_acc_%s_card%s\t%f" %
-                (args.task_name, card_num, ce_acc))
+                (args.task_name.replace("-", "_"), card_num, ce_cost))


    # final eval on dev set

--- a/PaddleNLP/PaddleLARK/XLNet/utils/cards.py
+++ b/PaddleNLP/PaddleLARK/XLNet/utils/cards.py
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+
+def get_cards():
+    """
+    get gpu cards number
+    """
+    num = 0
+    cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
+    if cards != '':
+        num = len(cards.split(","))
+    return num