diff --git a/fluid/sequence_tagging_for_ner/train.py b/fluid/sequence_tagging_for_ner/train.py index 0e05f6550f1b701748c9a2c34224b4a16dea7e8b..64e6dfe22d3a40eaedb65e8595f908b85117ee8f 100644 --- a/fluid/sequence_tagging_for_ner/train.py +++ b/fluid/sequence_tagging_for_ner/train.py @@ -60,7 +60,7 @@ def main(train_data_file, chunk_scheme="IOB", num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0))) - inference_program = fluid.default_main_program().clone() + inference_program = fluid.default_main_program().clone(for_test=True) with fluid.program_guard(inference_program): test_target = chunk_evaluator.metrics + chunk_evaluator.states inference_program = fluid.io.get_inference_program(test_target) diff --git a/fluid/text_classification/.run_ce.sh b/fluid/text_classification/.run_ce.sh new file mode 100755 index 0000000000000000000000000000000000000000..777ba02e8c78760452a6fb7d4ac1dc4a82d62594 --- /dev/null +++ b/fluid/text_classification/.run_ce.sh @@ -0,0 +1,5 @@ +###!/bin/bash +####This file is only used for continuous evaluation. + +export CE_MODE_X=1 +python train.py cnn | python _ce.py diff --git a/fluid/text_classification/_ce.py b/fluid/text_classification/_ce.py new file mode 100644 index 0000000000000000000000000000000000000000..100357204db7f3a8d0c1d3cbcbdc707410b20023 --- /dev/null +++ b/fluid/text_classification/_ce.py @@ -0,0 +1,48 @@ +####this file is only used for continuous evaluation test! + +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import CostKpi, DurationKpi, AccKpi + +#### NOTE kpi.py should shared in models in some way!!!! + +train_acc_kpi = AccKpi('train_acc', 0.005, actived=True) +train_cost_kpi = CostKpi('train_cost', 0.005, actived=True) +train_duration_kpi = DurationKpi('train_duration', 0.05, actived=True) + +tracking_kpis = [ + train_acc_kpi, + train_cost_kpi, + train_duration_kpi, +] + + +def parse_log(log): + for line in log.split('\n'): + fs = line.strip().split('\t') + print(fs) + if len(fs) == 3 and fs[0] == 'kpis': + print("-----%s" % fs) + kpi_name = fs[1] + kpi_value = float(fs[2]) + yield kpi_name, kpi_value + + +def log_to_ce(log): + kpi_tracker = {} + for kpi in tracking_kpis: + kpi_tracker[kpi.name] = kpi + + for (kpi_name, kpi_value) in parse_log(log): + print(kpi_name, kpi_value) + kpi_tracker[kpi_name].add_record(kpi_value) + kpi_tracker[kpi_name].persist() + + +if __name__ == '__main__': + log = sys.stdin.read() + print("*****") + print(log) + print("****") + log_to_ce(log) diff --git a/fluid/text_classification/train.py b/fluid/text_classification/train.py index dc164671e785b758365885b98788fae71d5f8a87..698e4dc0788f2e185810a4f782ac4dcff1f60c81 100644 --- a/fluid/text_classification/train.py +++ b/fluid/text_classification/train.py @@ -1,3 +1,4 @@ +import os import sys import time import unittest @@ -53,8 +54,12 @@ def train(train_reader, exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[data, label], place=place) + # For internal continuous evaluation + if 'CE_MODE_X' in os.environ: + fluid.default_startup_program().random_seed = 110 exe.run(fluid.default_startup_program()) for pass_id in xrange(pass_num): + pass_start = time.time() data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0 for data in train_reader(): avg_cost_np, avg_acc_np = exe.run(fluid.default_main_program(), @@ -73,6 +78,13 @@ def train(train_reader, epoch_model = save_dirname + "/" + "epoch" + str(pass_id) fluid.io.save_inference_model(epoch_model, ["words", "label"], acc, exe) + pass_end = time.time() + # For internal continuous evaluation + if 'CE_MODE_X' in os.environ: + print("kpis train_acc %f" % avg_acc) + print("kpis train_cost %f" % avg_cost) + print("kpis train_duration %f" % (pass_end - pass_start)) + def train_net(): word_dict, train_reader, test_reader = utils.prepare_data( diff --git a/fluid/text_classification/utils.py b/fluid/text_classification/utils.py index fba14dde63d27ada07d8fcd69cacfb631559e613..874679c3e2f9fe0c640d6da4f25d503023adcb65 100644 --- a/fluid/text_classification/utils.py +++ b/fluid/text_classification/utils.py @@ -1,3 +1,4 @@ +import os import sys import time import numpy as np @@ -64,15 +65,22 @@ def prepare_data(data_type="imdb", raise RuntimeError("No such dataset") if data_type == "imdb": - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.imdb.train(word_dict), buf_size=buf_size), - batch_size=batch_size) + if 'CE_MODE_X' in os.environ: + train_reader = paddle.batch( + paddle.dataset.imdb.train(word_dict), batch_size=batch_size) - test_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.imdb.test(word_dict), buf_size=buf_size), - batch_size=batch_size) + test_reader = paddle.batch( + paddle.dataset.imdb.test(word_dict), batch_size=batch_size) + else: + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.imdb.train(word_dict), buf_size=buf_size), + batch_size=batch_size) + + test_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.imdb.test(word_dict), buf_size=buf_size), + batch_size=batch_size) else: raise RuntimeError("no such dataset")