diff --git a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/.run_ce.sh b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/.run_ce.sh new file mode 100644 index 0000000000000000000000000000000000000000..92c8cef7c8fa8a9493429fef3593a13a399fa602 --- /dev/null +++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/.run_ce.sh @@ -0,0 +1,25 @@ +#!/bin/sh +export CE_MODE_X=ce +export FLAGS_eager_delete_tensor_gb=0.0 + +export CUDA_VISIBLE_DEVICES=0 + +python -u main.py \ + --do_train True \ + --use_cuda \ + --save_path model_files_tmp/matching_pretrained \ + --train_path data/unlabel_data/train.ids \ + --val_path data/unlabel_data/val.ids \ + --print_step 3 \ + --num_scan_data 3 | python _ce.py + +export CUDA_VISIBLE_DEVICES=0,1,2,3 + +python -u main.py \ + --do_train True \ + --use_cuda \ + --save_path model_files_tmp/matching_pretrained \ + --train_path data/unlabel_data/train.ids \ + --val_path data/unlabel_data/val.ids \ + --print_step 3 \ + --num_scan_data 3 | python _ce.py diff --git a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/__init__.py b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/_ce.py b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/_ce.py new file mode 100644 index 0000000000000000000000000000000000000000..3007050d84ef514ffceab058e18d48af3c012317 --- /dev/null +++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/_ce.py @@ -0,0 +1,60 @@ +# this file is only used for continuous evaluation test! + +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import CostKpi +from kpi import DurationKpi + +train_loss_card1 = CostKpi('train_loss_card1', 0.03, 0, actived=True) +train_loss_card4 = CostKpi('train_loss_card4', 0.03, 0, actived=True) +train_duration_card1 = DurationKpi('train_duration_card1', 0.01, 0, actived=True) +train_duration_card4 = DurationKpi('train_duration_card4', 0.01, 0, actived=True) + +tracking_kpis = [ + train_loss_card1, + train_loss_card4, + train_duration_card1, + train_duration_card4, +] + + +def parse_log(log): + ''' + This method should be implemented by model developers. + + The suggestion: + + each line in the log should be key, value, for example: + + " + train_cost\t1.0 + test_cost\t1.0 + train_cost\t1.0 + train_cost\t1.0 + train_acc\t1.2 + " + ''' + for line in log.split('\n'): + fs = line.strip().split('\t') + print(fs) + if len(fs) == 3 and fs[0] == 'kpis': + kpi_name = fs[1] + kpi_value = float(fs[2]) + yield kpi_name, kpi_value + + +def log_to_ce(log): + kpi_tracker = {} + for kpi in tracking_kpis: + kpi_tracker[kpi.name] = kpi + + for (kpi_name, kpi_value) in parse_log(log): + print(kpi_name, kpi_value) + kpi_tracker[kpi_name].add_record(kpi_value) + kpi_tracker[kpi_name].persist() + + +if __name__ == '__main__': + log = sys.stdin.read() + log_to_ce(log) diff --git a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/main.py b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/main.py index d0107c83c10e3e7d4892429d073ebbece398efbf..880be19fd14c431e747d98630581b366e833c048 100755 --- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/main.py +++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/main.py @@ -167,6 +167,7 @@ def train(args): begin_time = time.time() sum_cost = 0 + ce_cost = 0 for batch in train_batches: if (args.save_path is not None) and (global_step % args.save_step == 0): best_recall = save_exe(global_step, best_recall) @@ -174,6 +175,7 @@ def train(args): cost = train_with_feed(batch) global_step += 1 sum_cost += cost.mean() + ce_cost = cost.mean() if global_step % args.print_step == 0: print('training step %s avg loss %s' % (global_step, sum_cost / args.print_step)) @@ -183,6 +185,10 @@ def train(args): train_time += pass_time_cost print("Pass {0}, pass_time_cost {1}" .format(epoch, "%2.2f sec" % pass_time_cost)) + if "CE_MODE_X" in os.environ and epoch == args.num_scan_data - 1: + card_num = get_cards() + print("kpis\ttrain_duration_card%s\t%s" % (card_num, pass_time_cost)) + print("kpis\ttrain_loss_card%s\t%s" % (card_num, ce_cost)) def finetune(args): @@ -436,6 +442,14 @@ def infer(args): (args.init_model, out_path, t1 - t0)) +def get_cards(): + num = 0 + cards = os.environ.get('CUDA_VISIBLE_DEVICES', '') + if cards != '': + num = len(cards.split(",")) + return num + + def main(): """ main