From e0c1fb3a82231a8e3e888973e0b3c00be56b344e Mon Sep 17 00:00:00 2001 From: xuezhong Date: Fri, 2 Nov 2018 06:40:05 +0000 Subject: [PATCH] default setting --- .../machine_reading_comprehension/.run_ce.sh | 45 ++++++++++++ .../machine_reading_comprehension/_ce.py | 68 +++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 fluid/PaddleNLP/machine_reading_comprehension/.run_ce.sh create mode 100644 fluid/PaddleNLP/machine_reading_comprehension/_ce.py diff --git a/fluid/PaddleNLP/machine_reading_comprehension/.run_ce.sh b/fluid/PaddleNLP/machine_reading_comprehension/.run_ce.sh new file mode 100644 index 00000000..715d5bb3 --- /dev/null +++ b/fluid/PaddleNLP/machine_reading_comprehension/.run_ce.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +DATA_PATH=./data +if [ ! -e $DATA_PATH/demo ] ; then + mkdir -p $DATA_PATH/demo + if [ ! -e $DATA_PATH/demo.tgz ] ; then + cd $DATA_PATH + wget -c --no-check-certificate http://dureader.gz.bcebos.com/demo.tgz + cd - + fi + tar -zxf $DATA_PATH/demo.tgz -C $DATA_PATH/demo +fi + +train(){ +python -u run.py \ + --trainset 'data/demo/search.train.json' \ + --devset 'data/demo/search.dev.json' \ + --testset 'data/demo/search.test.json' \ + --vocab_dir 'data/demo/' \ + --use_gpu true \ + --save_dir ./models \ + --pass_num 1 \ + --learning_rate 0.001 \ + --batch_size 32 \ + --embed_size 300 \ + --hidden_size 150 \ + --max_p_num 5 \ + --max_p_len 500 \ + --max_q_len 60 \ + --max_a_len 200 \ + --drop_rate 0.2 \ + --log_interval 1 \ + --enable_ce \ + --train +} + +cudaid=${transformer:=0} # use 0-th card as default +export CUDA_VISIBLE_DEVICES=$cudaid + +train | python _ce.py + +cudaid=${transformer_m:=0,1} # use 0,1 card as default +export CUDA_VISIBLE_DEVICES=$cudaid + +train | python _ce.py diff --git a/fluid/PaddleNLP/machine_reading_comprehension/_ce.py b/fluid/PaddleNLP/machine_reading_comprehension/_ce.py new file mode 100644 index 00000000..24243846 --- /dev/null +++ b/fluid/PaddleNLP/machine_reading_comprehension/_ce.py @@ -0,0 +1,68 @@ +####this file is only used for continuous evaluation test! + +import os +import sys +#sys.path.insert(0, os.environ['ceroot']) +from kpi import CostKpi, DurationKpi, AccKpi + +#### NOTE kpi.py should shared in models in some way!!!! + +train_cost_card1_kpi = CostKpi('train_cost_card1', 0.02, 0, actived=True) +test_cost_card1_kpi = CostKpi('test_cost_card1', 0.005, 0, actived=True) +train_duration_card1_kpi = DurationKpi( + 'train_duration_card1', 0.06, 0, actived=True) +train_cost_card4_kpi = CostKpi('train_cost_card2', 0.01, 0, actived=True) +test_cost_card4_kpi = CostKpi('test_cost_card2', 0.005, 0, actived=True) +train_duration_card4_kpi = DurationKpi( + 'train_duration_card2', 0.06, 0, actived=True) + +tracking_kpis = [ + train_cost_card1_kpi, + test_cost_card1_kpi, + train_duration_card1_kpi, + train_cost_card4_kpi, + test_cost_card4_kpi, + train_duration_card4_kpi, +] + + +def parse_log(log): + ''' + This method should be implemented by model developers. + The suggestion: + each line in the log should be key, value, for example: + " + train_cost\t1.0 + test_cost\t1.0 + train_cost\t1.0 + train_cost\t1.0 + train_acc\t1.2 + " + ''' + for line in log.split('\n'): + fs = line.strip().split('\t') + print(fs) + if len(fs) == 3 and fs[0] == 'kpis': + print("-----%s" % fs) + kpi_name = fs[1] + kpi_value = float(fs[2]) + yield kpi_name, kpi_value + + +def log_to_ce(log): + kpi_tracker = {} + for kpi in tracking_kpis: + kpi_tracker[kpi.name] = kpi + + for (kpi_name, kpi_value) in parse_log(log): + print(kpi_name, kpi_value) + kpi_tracker[kpi_name].add_record(kpi_value) + kpi_tracker[kpi_name].persist() + + +if __name__ == '__main__': + log = sys.stdin.read() + print("*****") + print(log) + print("****") + log_to_ce(log) -- GitLab