From f82625d5a9846eb77a78220d4d7bb6b3146521af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B4=BE=E6=99=93?= Date: Tue, 28 May 2019 14:16:20 +0800 Subject: [PATCH] add label semantic roles ce (#718) * add ce * add ce * add 02.recognize_digits ce files * modify ce file code style of 01.fit_a_line and 02.recognize_digits * add 04.word2vec ce * add 05.recommender_system ce * add label_semantic_roles ce --- 05.recommender_system/.run_ce.sh | 4 +++ 05.recommender_system/_ce.py | 36 +++++++++++++++++++++++ 05.recommender_system/train.py | 47 +++++++++++++++++++++++++----- 07.label_semantic_roles/.run_ce.sh | 4 +++ 07.label_semantic_roles/_ce.py | 37 +++++++++++++++++++++++ 07.label_semantic_roles/train.py | 38 +++++++++++++++++++++--- 6 files changed, 154 insertions(+), 12 deletions(-) create mode 100755 05.recommender_system/.run_ce.sh create mode 100644 05.recommender_system/_ce.py create mode 100644 07.label_semantic_roles/.run_ce.sh create mode 100644 07.label_semantic_roles/_ce.py diff --git a/05.recommender_system/.run_ce.sh b/05.recommender_system/.run_ce.sh new file mode 100755 index 0000000..4c5ae21 --- /dev/null +++ b/05.recommender_system/.run_ce.sh @@ -0,0 +1,4 @@ +#!/bin/bash +#This file is only used for continuous evaluation. +python train.py --enable_ce | python _ce.py + diff --git a/05.recommender_system/_ce.py b/05.recommender_system/_ce.py new file mode 100644 index 0000000..d46acd3 --- /dev/null +++ b/05.recommender_system/_ce.py @@ -0,0 +1,36 @@ +### This file is only used for continuous evaluation test! +from __future__ import print_function +from __future__ import division +from __future__ import absolute_import +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import CostKpi + +test_cost_kpi = CostKpi('test_cost', 0.02, 0, actived=True, desc='test cost') +tracking_kpis = [test_cost_kpi] + + +def parse_log(log): + for line in log.split('\n'): + fs = line.strip().split('\t') + print(fs) + if len(fs) == 3 and fs[0] == 'kpis': + kpi_name = fs[1] + kpi_value = float(fs[2]) + yield kpi_name, kpi_value + + +def log_to_ce(log): + kpi_tracker = {} + for kpi in tracking_kpis: + kpi_tracker[kpi.name] = kpi + for (kpi_name, kpi_value) in parse_log(log): + print(kpi_name, kpi_value) + kpi_tracker[kpi_name].add_record(kpi_value) + kpi_tracker[kpi_name].persist() + + +if __name__ == '__main__': + log = sys.stdin.read() + log_to_ce(log) diff --git a/05.recommender_system/train.py b/05.recommender_system/train.py index a4eafdb..70e7160 100644 --- a/05.recommender_system/train.py +++ b/05.recommender_system/train.py @@ -15,6 +15,7 @@ from __future__ import print_function import math import sys +import argparse import numpy as np import paddle import paddle.fluid as fluid @@ -22,9 +23,21 @@ import paddle.fluid.layers as layers import paddle.fluid.nets as nets IS_SPARSE = True -USE_GPU = False BATCH_SIZE = 256 -PASS_NUM = 100 + + +def parse_args(): + parser = argparse.ArgumentParser("recommender_system") + parser.add_argument( + '--enable_ce', + action='store_true', + help="If set, run the task with continuous evaluation logs.") + parser.add_argument( + '--use_gpu', type=int, default=0, help="Whether to use GPU or not.") + parser.add_argument( + '--num_epochs', type=int, default=1, help="number of epochs.") + args = parser.parse_args() + return args def get_usr_combined_features(): @@ -154,11 +167,18 @@ def optimizer_func(): def train(use_cuda, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - train_reader = paddle.batch( - paddle.reader.shuffle(paddle.dataset.movielens.train(), buf_size=8192), - batch_size=BATCH_SIZE) - test_reader = paddle.batch( - paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) + if args.enable_ce: + train_reader = paddle.batch( + paddle.dataset.movielens.train(), batch_size=BATCH_SIZE) + test_reader = paddle.batch( + paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) + else: + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.movielens.train(), buf_size=8192), + batch_size=BATCH_SIZE) + test_reader = paddle.batch( + paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) feed_order = [ 'user_id', 'gender_id', 'age_id', 'job_id', 'movie_id', 'category_id', @@ -167,6 +187,10 @@ def train(use_cuda, params_dirname): main_program = fluid.default_main_program() star_program = fluid.default_startup_program() + if args.enable_ce: + main_program.random_seed = 90 + star_program.random_seed = 90 + scale_infer, avg_cost = inference_program() test_program = main_program.clone(for_test=True) @@ -212,6 +236,10 @@ def train(use_cuda, params_dirname): # if test_avg_cost < 4.0: # Change this number to adjust accuracy if batch_id == 20: + + if args.enable_ce: + print("kpis\ttest_cost\t%f" % float(test_avg_cost)) + if params_dirname is not None: fluid.io.save_inference_model(params_dirname, [ "user_id", "gender_id", "age_id", "job_id", @@ -319,4 +347,7 @@ def main(use_cuda): if __name__ == '__main__': - main(USE_GPU) + args = parse_args() + PASS_NUM = args.num_epochs + use_cuda = args.use_gpu + main(use_cuda) diff --git a/07.label_semantic_roles/.run_ce.sh b/07.label_semantic_roles/.run_ce.sh new file mode 100644 index 0000000..4c5ae21 --- /dev/null +++ b/07.label_semantic_roles/.run_ce.sh @@ -0,0 +1,4 @@ +#!/bin/bash +#This file is only used for continuous evaluation. +python train.py --enable_ce | python _ce.py + diff --git a/07.label_semantic_roles/_ce.py b/07.label_semantic_roles/_ce.py new file mode 100644 index 0000000..11f2f76 --- /dev/null +++ b/07.label_semantic_roles/_ce.py @@ -0,0 +1,37 @@ +### This file is only used for continuous evaluation test! +from __future__ import print_function +from __future__ import division +from __future__ import absolute_import +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import CostKpi + +train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True, desc='train cost') +tracking_kpis = [train_cost_kpi] + + +def parse_log(log): + for line in log.split('\n'): + fs = line.strip().split('\t') + print(fs) + if len(fs) == 3 and fs[0] == 'kpis': + kpi_name = fs[1] + kpi_value = float(fs[2]) + yield kpi_name, kpi_value + + +def log_to_ce(log): + kpi_tracker = {} + for kpi in tracking_kpis: + kpi_tracker[kpi.name] = kpi + + for (kpi_name, kpi_value) in parse_log(log): + print(kpi_name, kpi_value) + kpi_tracker[kpi_name].add_record(kpi_value) + kpi_tracker[kpi_name].persist() + + +if __name__ == '__main__': + log = sys.stdin.read() + log_to_ce(log) diff --git a/07.label_semantic_roles/train.py b/07.label_semantic_roles/train.py index 2952515..12af7f7 100644 --- a/07.label_semantic_roles/train.py +++ b/07.label_semantic_roles/train.py @@ -7,6 +7,7 @@ import paddle.dataset.conll05 as conll05 import paddle.fluid as fluid import six import time +import argparse with_gpu = os.getenv('WITH_GPU', '0') != '0' @@ -29,6 +30,20 @@ BATCH_SIZE = 10 embedding_name = 'emb' +def parse_args(): + parser = argparse.ArgumentParser("label_semantic_roles") + parser.add_argument( + '--enable_ce', + action='store_true', + help="If set, run the task with continuous evaluation logs.") + parser.add_argument( + '--use_gpu', type=int, default=0, help="Whether to use GPU or not.") + parser.add_argument( + '--num_epochs', type=int, default=100, help="number of epochs.") + args = parser.parse_args() + return args + + def load_parameter(file_name, h, w): with open(file_name, 'rb') as f: f.read(16) # skip header. @@ -122,6 +137,10 @@ def train(use_cuda, save_dirname=None, is_local=True): mark = fluid.layers.data( name='mark_data', shape=[1], dtype='int64', lod_level=1) + if args.enable_ce: + fluid.default_startup_program().random_seed = 90 + fluid.default_main_program().random_seed = 90 + # define network topology feature_out = db_lstm(**locals()) target = fluid.layers.data( @@ -145,9 +164,13 @@ def train(use_cuda, save_dirname=None, is_local=True): crf_decode = fluid.layers.crf_decoding( input=feature_out, param_attr=fluid.ParamAttr(name='crfw')) - train_data = paddle.batch( - paddle.reader.shuffle(paddle.dataset.conll05.test(), buf_size=8192), - batch_size=BATCH_SIZE) + if args.enable_ce: + train_data = paddle.batch( + paddle.dataset.conll05.test(), batch_size=BATCH_SIZE) + else: + train_data = paddle.batch( + paddle.reader.shuffle(paddle.dataset.conll05.test(), buf_size=8192), + batch_size=BATCH_SIZE) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() @@ -181,6 +204,9 @@ def train(use_cuda, save_dirname=None, is_local=True): time.time() - start_time) / batch_id)) # Set the threshold low to speed up the CI test if float(cost) < 60.0: + if args.enable_ce: + print("kpis\ttrain_cost\t%f" % cost) + if save_dirname is not None: # TODO(liuyiqun): Change the target to crf_decode fluid.io.save_inference_model(save_dirname, [ @@ -282,4 +308,8 @@ def main(use_cuda, is_local=True): infer(use_cuda, save_dirname) -main(use_cuda=False) +if __name__ == '__main__': + args = parse_args() + use_cuda = args.use_gpu + PASS_NUM = args.num_epochs + main(use_cuda) -- GitLab