From 99f5b2d6b5fa3381176002af868d8ce9942ddfb6 Mon Sep 17 00:00:00 2001 From: zhengya01 <43601548+zhengya01@users.noreply.github.com> Date: Tue, 14 May 2019 15:45:06 +0800 Subject: [PATCH] Ce language model (#2193) * add ce for language_model * update ce * update ce --- PaddleNLP/language_model/.run_ce.sh | 15 +++++++ PaddleNLP/language_model/__init__.py | 0 PaddleNLP/language_model/_ce.py | 62 ++++++++++++++++++++++++++++ PaddleNLP/language_model/train.py | 15 +++++-- 4 files changed, 89 insertions(+), 3 deletions(-) create mode 100644 PaddleNLP/language_model/.run_ce.sh create mode 100644 PaddleNLP/language_model/__init__.py create mode 100644 PaddleNLP/language_model/_ce.py diff --git a/PaddleNLP/language_model/.run_ce.sh b/PaddleNLP/language_model/.run_ce.sh new file mode 100644 index 00000000..96c7e71d --- /dev/null +++ b/PaddleNLP/language_model/.run_ce.sh @@ -0,0 +1,15 @@ +export CUDA_VISIBLE_DEVICES=0 + +python train.py \ + --data_path data/simple-examples/data/ \ + --model_type test \ + --use_gpu True \ + --rnn_model static \ + --enable_ce | python _ce.py + +python train.py \ + --data_path data/simple-examples/data/ \ + --model_type test \ + --use_gpu True \ + --rnn_model padding \ + --enable_ce | python _ce.py diff --git a/PaddleNLP/language_model/__init__.py b/PaddleNLP/language_model/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/PaddleNLP/language_model/_ce.py b/PaddleNLP/language_model/_ce.py new file mode 100644 index 00000000..2a2cadae --- /dev/null +++ b/PaddleNLP/language_model/_ce.py @@ -0,0 +1,62 @@ +# this file is only used for continuous evaluation test! + +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import CostKpi +from kpi import DurationKpi + +imikolov_20_avg_ppl_kpi_card1 = CostKpi('lstm_language_model_static_loss_card1', 0.01, 0) +imikolov_20_pass_duration_kpi_card1 = DurationKpi( + 'lstm_language_model_static_duration_card1', 0.03, 0, actived=True) +imikolov_20_avg_ppl_kpi_card1_padding = CostKpi('lstm_language_model_padding_loss_card1', 0.01, 0) +imikolov_20_pass_duration_kpi_card1_padding = DurationKpi( + 'lstm_language_model_padding_duration_card1', 0.03, 0, actived=True) + +tracking_kpis = [ + imikolov_20_avg_ppl_kpi_card1, + imikolov_20_pass_duration_kpi_card1, + imikolov_20_avg_ppl_kpi_card1_padding, + imikolov_20_pass_duration_kpi_card1_padding, +] + + +def parse_log(log): + ''' + This method should be implemented by model developers. + + The suggestion: + + each line in the log should be key, value, for example: + + " + train_cost\t1.0 + test_cost\t1.0 + train_cost\t1.0 + train_cost\t1.0 + train_acc\t1.2 + " + ''' + for line in log.split('\n'): + fs = line.strip().split('\t') + print(fs) + if len(fs) == 3 and fs[0] == 'ptblm': + kpi_name = fs[1] + kpi_value = float(fs[2]) + yield kpi_name, kpi_value + + +def log_to_ce(log): + kpi_tracker = {} + for kpi in tracking_kpis: + kpi_tracker[kpi.name] = kpi + + for (kpi_name, kpi_value) in parse_log(log): + print(kpi_name, kpi_value) + kpi_tracker[kpi_name].add_record(kpi_value) + kpi_tracker[kpi_name].persist() + + +if __name__ == '__main__': + log = sys.stdin.read() + log_to_ce(log) diff --git a/PaddleNLP/language_model/train.py b/PaddleNLP/language_model/train.py index 115b803e..e399e998 100644 --- a/PaddleNLP/language_model/train.py +++ b/PaddleNLP/language_model/train.py @@ -286,9 +286,10 @@ def train(): print("train ppl", ppl[0]) if epoch_id == max_epoch - 1 and args.enable_ce: - print("ptblm\tlstm_language_model_duration\t%s" % - (total_time / max_epoch)) - print("ptblm\tlstm_language_model_loss\t%s" % ppl[0]) + card_num = get_cards() + print("ptblm\tlstm_language_model_duration_card%d\t%s" % + (card_num, total_time / max_epoch)) + print("ptblm\tlstm_language_model_loss_card%d\t%s" % (card_num, ppl[0])) model_path = os.path.join("model_new/", str(epoch_id)) if not os.path.isdir(model_path): @@ -301,5 +302,13 @@ def train(): print("test ppl", test_ppl[0]) +def get_cards(): + num = 0 + cards = os.environ.get('CUDA_VISIBLE_DEVICES', '') + if cards != '': + num = len(cards.split(",")) + return num + + if __name__ == '__main__': train() -- GitLab