diff --git a/fluid/icnet/.run_ce.sh b/fluid/icnet/.run_ce.sh new file mode 100755 index 0000000000000000000000000000000000000000..a46081c7978395697b843c5fef95e6091b47e4e5 --- /dev/null +++ b/fluid/icnet/.run_ce.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +# This file is only used for continuous evaluation. + +rm -rf *_factor.txt +python train.py --use_gpu=True 1> log +cat log | python _ce.py diff --git a/fluid/icnet/_ce.py b/fluid/icnet/_ce.py new file mode 100644 index 0000000000000000000000000000000000000000..3844eefde620f9587d747594ad0d5351999859c8 --- /dev/null +++ b/fluid/icnet/_ce.py @@ -0,0 +1,57 @@ +# this file is only used for continuous evaluation test! + +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import CostKpi, DurationKpi, AccKpi + +# NOTE kpi.py should shared in models in some way!!!! + +train_cost_kpi = CostKpi('train_cost', 0.02, actived=True) +train_duration_kpi = DurationKpi('train_duration', 0.06, actived=True) + +tracking_kpis = [ + train_cost_kpi, + train_duration_kpi, +] + + +def parse_log(log): + ''' + This method should be implemented by model developers. + + The suggestion: + + each line in the log should be key, value, for example: + + " + train_cost\t1.0 + test_cost\t1.0 + train_cost\t1.0 + train_cost\t1.0 + train_acc\t1.2 + " + ''' + for line in log.split('\n'): + fs = line.strip().split('\t') + print(fs) + if len(fs) == 3 and fs[0] == 'kpis': + kpi_name = fs[1] + kpi_value = float(fs[2]) + yield kpi_name, kpi_value + + +def log_to_ce(log): + kpi_tracker = {} + for kpi in tracking_kpis: + kpi_tracker[kpi.name] = kpi + + for (kpi_name, kpi_value) in parse_log(log): + print(kpi_name, kpi_value) + kpi_tracker[kpi_name].add_record(kpi_value) + kpi_tracker[kpi_name].persist() + + +if __name__ == '__main__': + log = sys.stdin.read() + log_to_ce(log) diff --git a/fluid/icnet/eval.py b/fluid/icnet/eval.py index d3253c3cb63b8bb58d8a1bdad3318de1c1441142..bdebe7ad72d799e709bd529711d600a9d692a838 100644 --- a/fluid/icnet/eval.py +++ b/fluid/icnet/eval.py @@ -20,12 +20,12 @@ add_arg('use_gpu', bool, True, "Whether use GPU to test.") def cal_mean_iou(wrong, correct): - sum = wrong + cerroct + sum = wrong + correct true_num = (sum != 0).sum() - for i in len(sum): + for i in range(len(sum)): if sum[i] == 0: sum[i] = 1 - return (cerroct.astype("float64") / sum).sum() / true_num + return (correct.astype("float64") / sum).sum() / true_num def create_iou(predict, label, mask, num_classes, image_shape): @@ -84,6 +84,7 @@ def eval(args): sys.stdout.flush() iou = cal_mean_iou(out_wrong, out_right) print "\nmean iou: %.3f" % iou + print "kpis test_acc %f" % iou def main(): diff --git a/fluid/icnet/icnet.py b/fluid/icnet/icnet.py index 14eaa5fa25c8570cc8747842333c7ca72f104fd1..afe3fa9d352bd8fbf6b2fad46f24ad4c9841a1ff 100644 --- a/fluid/icnet/icnet.py +++ b/fluid/icnet/icnet.py @@ -184,7 +184,7 @@ def res_block(input, filter_num, padding=0, dilation=None, name=None): tmp = conv(tmp, 1, 1, filter_num, 1, 1, name=name + "_1_1_increase") tmp = bn(tmp, relu=False) tmp = input + tmp - tmp = fluid.layers.relu(tmp, name=name + "_relu") + tmp = fluid.layers.relu(tmp) return tmp @@ -227,7 +227,7 @@ def proj_block(input, filter_num, padding=0, dilation=None, stride=1, tmp = conv(tmp, 1, 1, filter_num, 1, 1, name=name + "_1_1_increase") tmp = bn(tmp, relu=False) tmp = proj_bn + tmp - tmp = fluid.layers.relu(tmp, name=name + "_relu") + tmp = fluid.layers.relu(tmp) return tmp diff --git a/fluid/icnet/train.py b/fluid/icnet/train.py index 298a2113a15614641d573551e67006f9abbe751a..b38f08258b9b3e1bd28d808b2779416259f9d827 100644 --- a/fluid/icnet/train.py +++ b/fluid/icnet/train.py @@ -11,6 +11,10 @@ from utils import add_arguments, print_arguments, get_feeder_data from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter from paddle.fluid.initializer import init_on_cpu +SEED = 90 +# random seed must set before configuring the network. +fluid.default_startup_program().random_seed = SEED + parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) # yapf: disable @@ -27,9 +31,9 @@ LAMBDA2 = 0.4 LAMBDA3 = 1.0 LEARNING_RATE = 0.003 POWER = 0.9 -LOG_PERIOD = 1 -CHECKPOINT_PERIOD = 1000 -TOTAL_STEP = 60000 +LOG_PERIOD = 100 +CHECKPOINT_PERIOD = 100 +TOTAL_STEP = 100 no_grad_set = [] @@ -97,10 +101,13 @@ def train(args): sub124_loss = 0. train_reader = cityscape.train( args.batch_size, flip=args.random_mirror, scaling=args.random_scaling) + start_time = time.time() while True: # train a pass for data in train_reader(): if iter_id > TOTAL_STEP: + end_time = time.time() + print "kpis train_duration %f" % (end_time - start_time) return iter_id += 1 results = exe.run( @@ -115,13 +122,15 @@ def train(args): print "Iter[%d]; train loss: %.3f; sub4_loss: %.3f; sub24_loss: %.3f; sub124_loss: %.3f" % ( iter_id, t_loss / LOG_PERIOD, sub4_loss / LOG_PERIOD, sub24_loss / LOG_PERIOD, sub124_loss / LOG_PERIOD) + print "kpis train_cost %f" % (t_loss / LOG_PERIOD) + t_loss = 0. sub4_loss = 0. sub24_loss = 0. sub124_loss = 0. sys.stdout.flush() - if iter_id % CHECKPOINT_PERIOD == 0: + if iter_id % CHECKPOINT_PERIOD == 0 and args.checkpoint_path is not None: dir_name = args.checkpoint_path + "/" + str(iter_id) fluid.io.save_persistables(exe, dirname=dir_name) print "Saved checkpoint: %s" % (dir_name) diff --git a/fluid/image_classification/.gitignore b/fluid/image_classification/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..3048ccf6383cceed690606fdcd6a8880f1d029ab --- /dev/null +++ b/fluid/image_classification/.gitignore @@ -0,0 +1,9 @@ +# saved model +output/ + +# coco and pascalvoc data +data/ILSVRC2012/ILSVRC2012_img_val.tar +data/ILSVRC2012/ILSVRC2012_img_train.tar +data/ILSVRC2012/ImageNet_label.tgz +data/ILSVRC2012/train_list.txt +data/ILSVRC2012/val_list.txt diff --git a/fluid/image_classification/.run_ce.sh b/fluid/image_classification/.run_ce.sh new file mode 100755 index 0000000000000000000000000000000000000000..e54fcc8b61d4e01befe7fa845a3bc7a5e315dc94 --- /dev/null +++ b/fluid/image_classification/.run_ce.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# This file is only used for continuous evaluation. +cudaid=${object_detection_cudaid:=0} +export CUDA_VISIBLE_DEVICES=$cudaid +python train.py --batch_size=64 --num_passes=10 --total_images=6149 --enable_ce=True | python _ce.py + +cudaid=${object_detection_cudaid:=0, 1, 2, 3} +export CUDA_VISIBLE_DEVICES=$cudaid +python train.py --batch_size=64 --num_passes=10 --total_images=6149 --enable_ce=True | python _ce.py diff --git a/fluid/image_classification/_ce.py b/fluid/image_classification/_ce.py new file mode 100644 index 0000000000000000000000000000000000000000..91d7ca9394fc2c638573265d159ed1c49755eb3d --- /dev/null +++ b/fluid/image_classification/_ce.py @@ -0,0 +1,94 @@ +####this file is only used for continuous evaluation test! + +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import CostKpi, DurationKpi, AccKpi + +#### NOTE kpi.py should shared in models in some way!!!! + +train_acc_top1_kpi = AccKpi('train_acc_top1', 0.05, 0, desc='TOP1 ACC') +train_acc_top5_kpi = AccKpi( + 'train_acc_top5', 0.05, 0, actived=False, desc='TOP5 ACC') +train_cost_kpi = CostKpi('train_cost', 0.3, 0, actived=True, desc='train cost') +test_acc_top1_kpi = AccKpi('test_acc_top1', 0.05, 0, desc='TOP1 ACC') +test_acc_top5_kpi = AccKpi( + 'test_acc_top5', 0.05, 0, actived=False, desc='TOP5 ACC') +test_cost_kpi = CostKpi('test_cost', 1.0, 0, actived=True, desc='train cost') +train_speed_kpi = AccKpi( + 'train_speed', + 0.05, + 0, + actived=True, + unit_repr='seconds/image', + desc='train speed in one GPU card') +train_acc_top1_card4_kpi = AccKpi( + 'train_acc_top1_card4', 0.05, 0, desc='TOP1 ACC') +train_acc_top5_card4_kpi = AccKpi( + 'train_acc_top5_card4', 0.05, 0, actived=False, desc='TOP5 ACC') +train_cost_card4_kpi = CostKpi( + 'train_cost_kpi', 0.3, 0, actived=True, desc='train cost') +test_acc_top1_card4_kpi = AccKpi( + 'test_acc_top1_card4', 0.05, 0, desc='TOP1 ACC') +test_acc_top5_card4_kpi = AccKpi( + 'test_acc_top5_card4', 0.05, 0, actived=False, desc='TOP5 ACC') +test_cost_card4_kpi = CostKpi( + 'test_cost_card4', 1.0, 0, actived=True, desc='train cost') +train_speed_card4_kpi = AccKpi( + 'train_speed_card4', + 0.05, + 0, + actived=True, + unit_repr='seconds/image', + desc='train speed in four GPU card') +tracking_kpis = [ + train_acc_top1_kpi, train_acc_top5_kpi, train_cost_kpi, test_acc_top1_kpi, + test_acc_top5_kpi, test_cost_kpi, train_speed_kpi, train_acc_top1_card4_kpi, + train_acc_top5_card4_kpi, train_cost_card4_kpi, test_acc_top1_card4_kpi, + test_acc_top5_card4_kpi, test_cost_card4_kpi, train_speed_card4_kpi +] + + +def parse_log(log): + ''' + This method should be implemented by model developers. + + The suggestion: + + each line in the log should be key, value, for example: + + " + train_cost\t1.0 + test_cost\t1.0 + train_cost\t1.0 + train_cost\t1.0 + train_acc\t1.2 + " + ''' + for line in log.split('\n'): + fs = line.strip().split('\t') + print(fs) + if len(fs) == 3 and fs[0] == 'kpis': + print("-----%s" % fs) + kpi_name = fs[1] + kpi_value = float(fs[2]) + yield kpi_name, kpi_value + + +def log_to_ce(log): + kpi_tracker = {} + for kpi in tracking_kpis: + kpi_tracker[kpi.name] = kpi + + for (kpi_name, kpi_value) in parse_log(log): + print(kpi_name, kpi_value) + kpi_tracker[kpi_name].add_record(kpi_value) + kpi_tracker[kpi_name].persist() + + +if __name__ == '__main__': + log = sys.stdin.read() + print("*****") + print(log) + print("****") + log_to_ce(log) diff --git a/fluid/image_classification/models/se_resnext.py b/fluid/image_classification/models/se_resnext.py index 2cef2ef6bd09b3d46ac7533496b0d14e3513a5f8..272880c7cffd28f9ce09507b5ab931abb219d874 100644 --- a/fluid/image_classification/models/se_resnext.py +++ b/fluid/image_classification/models/se_resnext.py @@ -11,6 +11,7 @@ train_parameters = { "input_size": [3, 224, 224], "input_mean": [0.485, 0.456, 0.406], "input_std": [0.229, 0.224, 0.225], + "dropout_seed": None, "learning_strategy": { "name": "piecewise_decay", "batch_size": 256, @@ -101,7 +102,9 @@ class SE_ResNeXt(): pool = fluid.layers.pool2d( input=conv, pool_size=7, pool_type='avg', global_pooling=True) - drop = fluid.layers.dropout(x=pool, dropout_prob=0.5) + # do not set seed when traning, it is only used for debug + drop = fluid.layers.dropout( + x=pool, dropout_prob=0.5, seed=self.params["dropout_seed"]) stdv = 1.0 / math.sqrt(drop.shape[1] * 1.0) out = fluid.layers.fc(input=drop, size=class_dim, diff --git a/fluid/image_classification/train.py b/fluid/image_classification/train.py index 51bf9901246cb554baaef22a8e526d0ecd81bd0a..aed5802c8e75bb2a636b39f1627cd7333e9109cc 100644 --- a/fluid/image_classification/train.py +++ b/fluid/image_classification/train.py @@ -4,6 +4,7 @@ import time import sys import paddle import paddle.fluid as fluid +import paddle.dataset.flowers as flowers import models import reader import argparse @@ -28,6 +29,7 @@ add_arg('checkpoint', str, None, "Whether to resume chec add_arg('lr', float, 0.1, "set learning rate.") add_arg('lr_strategy', str, "piecewise_decay", "Set the learning rate decay strategy.") add_arg('model', str, "SE_ResNeXt50_32x4d", "Set the network to use.") +add_arg('enable_ce', bool, False, "If set True, enable continuous evaluation job.") # yapf: enable model_list = [m for m in dir(models) if "__" not in m] @@ -100,6 +102,9 @@ def train(args): # model definition model = models.__dict__[model_name]() + if args.enable_ce: + assert model_name == "SE_ResNeXt50_32x4d" + if model_name is "GoogleNet": out0, out1, out2 = model.net(input=image, class_dim=class_dim) cost0 = fluid.layers.cross_entropy(input=out0, label=label) @@ -129,6 +134,8 @@ def train(args): params["num_epochs"] = args.num_epochs params["learning_strategy"]["batch_size"] = args.batch_size params["learning_strategy"]["name"] = args.lr_strategy + if args.enable_ce: + params["dropout_seed"] = 10 # initialize optimizer optimizer = optimizer_setting(params) @@ -137,6 +144,9 @@ def train(args): if with_memory_optimization: fluid.memory_optimize(fluid.default_main_program()) + if args.enable_ce: + fluid.default_startup_program().random_seed = 1000 + place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) @@ -153,8 +163,20 @@ def train(args): train_batch_size = args.batch_size test_batch_size = 16 - train_reader = paddle.batch(reader.train(), batch_size=train_batch_size) - test_reader = paddle.batch(reader.val(), batch_size=test_batch_size) + + if not args.enable_ce: + train_reader = paddle.batch(reader.train(), batch_size=train_batch_size) + test_reader = paddle.batch(reader.val(), batch_size=test_batch_size) + else: + # use flowers dataset for CE and set use_xmap False to avoid disorder data + # but it is time consuming. For faster speed, need another dataset. + import random + random.seed(0) + train_reader = paddle.batch( + flowers.train(use_xmap=False), batch_size=train_batch_size) + test_reader = paddle.batch( + flowers.test(use_xmap=False), batch_size=test_batch_size) + feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) train_exe = fluid.ParallelExecutor( @@ -162,9 +184,12 @@ def train(args): fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name] + gpu = os.getenv("CUDA_VISIBLE_DEVICES") or "" + gpu_nums = len(gpu.split(",")) for pass_id in range(params["num_epochs"]): train_info = [[], [], []] test_info = [[], [], []] + train_time = [] for batch_id, data in enumerate(train_reader()): t1 = time.time() loss, acc1, acc5 = train_exe.run(fetch_list, feed=feeder.feed(data)) @@ -176,6 +201,7 @@ def train(args): train_info[0].append(loss) train_info[1].append(acc1) train_info[2].append(acc5) + train_time.append(period) if batch_id % 10 == 0: print("Pass {0}, trainbatch {1}, loss {2}, \ acc1 {3}, acc5 {4} time {5}" @@ -187,6 +213,7 @@ def train(args): train_loss = np.array(train_info[0]).mean() train_acc1 = np.array(train_info[1]).mean() train_acc5 = np.array(train_info[2]).mean() + train_speed = np.array(train_time).mean() / train_batch_size cnt = 0 for test_batch_id, data in enumerate(test_reader()): t1 = time.time() @@ -226,6 +253,36 @@ def train(args): os.makedirs(model_path) fluid.io.save_persistables(exe, model_path) + # This is for continuous evaluation only + if args.enable_ce and pass_id == args.num_epochs - 1: + if gpu_nums == 1: + # Use the last cost/acc for training + print("kpis train_cost %s" % train_loss) + print("kpis train_acc_top1 %s" % train_acc1) + print("kpis train_acc_top5 %s" % train_acc5) + # Use the mean cost/acc for testing + print("kpis test_cost %s" % test_loss) + print("kpis test_acc_top1 %s" % test_acc1) + print("kpis test_acc_top5 %s" % test_acc5) + print("kpis train_speed %s" % train_speed) + else: + # Use the last cost/acc for training + print("kpis train_cost_card%s %s" % + (gpu_nums, train_loss)) + print("kpis train_acc_top1_card%s %s" % + (gpu_nums, train_acc1)) + print("kpis train_acc_top5_card%s %s" % + (gpu_nums, train_acc5)) + # Use the mean cost/acc for testing + print("kpis test_cost_card%s %s" % + (gpu_nums, test_loss)) + print("kpis test_acc_top1_card%s %s" % + (gpu_nums, test_acc1)) + print("kpis test_acc_top5_card%s %s" % + (gpu_nums, test_acc5)) + print("kpis train_speed_card%s %s" % + (gpu_nums, train_speed)) + def main(): args = parser.parse_args() diff --git a/fluid/language_model/train.py b/fluid/language_model/train.py index f3e7a7398bf13e14c74ce1d10d90b7bf34031698..e719f8f107852f79972c1ad59b7388b52295691e 100644 --- a/fluid/language_model/train.py +++ b/fluid/language_model/train.py @@ -145,7 +145,7 @@ def train(train_reader, if pass_idx == pass_num - 1 and args.enable_ce: #Note: The following logs are special for CE monitoring. #Other situations do not need to care about these logs. - gpu_num = get_cards() + gpu_num = get_cards(args.enable_ce) if gpu_num == 1: print("kpis imikolov_20_pass_duration %s" % (total_time / epoch_idx)) diff --git a/fluid/neural_machine_translation/transformer/.run_ce.sh b/fluid/neural_machine_translation/transformer/.run_ce.sh new file mode 100644 index 0000000000000000000000000000000000000000..e37856e5055ac3689416355ca9b29e6b8911598f --- /dev/null +++ b/fluid/neural_machine_translation/transformer/.run_ce.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +DATA_PATH=$HOME/.cache/paddle/dataset/wmt16 +if [ ! -d $DATA_PATH ] ; then + python -c 'import paddle;paddle.dataset.wmt16.train(10000, 10000, "en")'\ + '().next()' + tar -zxf $DATA_PATH/wmt16.tar.gz -C $DATA_PATH +fi + +train(){ + python -u train.py \ + --src_vocab_fpath $DATA_PATH/en_10000.dict \ + --trg_vocab_fpath $DATA_PATH/de_10000.dict \ + --special_token '' '' '' \ + --train_file_pattern $DATA_PATH/wmt16/train \ + --val_file_pattern $DATA_PATH/wmt16/val \ + --use_token_batch True \ + --batch_size 2048 \ + --sort_type pool \ + --pool_size 10000 \ + --enable_ce True \ + weight_sharing False \ + pass_num 20 \ + dropout_seed 10 +} + +train | python _ce.py diff --git a/fluid/neural_machine_translation/transformer/_ce.py b/fluid/neural_machine_translation/transformer/_ce.py new file mode 100644 index 0000000000000000000000000000000000000000..b774af075b028f4afafeb575f65cc8c9de8dc96b --- /dev/null +++ b/fluid/neural_machine_translation/transformer/_ce.py @@ -0,0 +1,60 @@ +####this file is only used for continuous evaluation test! + +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import CostKpi, DurationKpi, AccKpi + +#### NOTE kpi.py should shared in models in some way!!!! + +train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True) +test_cost_kpi = CostKpi('test_cost', 0.005, 0, actived=True) +train_duration_kpi = DurationKpi('train_duration', 0.06, 0, actived=True) + +tracking_kpis = [ + train_cost_kpi, + test_cost_kpi, + train_duration_kpi, +] + + +def parse_log(log): + ''' + This method should be implemented by model developers. + The suggestion: + each line in the log should be key, value, for example: + " + train_cost\t1.0 + test_cost\t1.0 + train_cost\t1.0 + train_cost\t1.0 + train_acc\t1.2 + " + ''' + for line in log.split('\n'): + fs = line.strip().split('\t') + print(fs) + if len(fs) == 3 and fs[0] == 'kpis': + print("-----%s" % fs) + kpi_name = fs[1] + kpi_value = float(fs[2]) + yield kpi_name, kpi_value + + +def log_to_ce(log): + kpi_tracker = {} + for kpi in tracking_kpis: + kpi_tracker[kpi.name] = kpi + + for (kpi_name, kpi_value) in parse_log(log): + print(kpi_name, kpi_value) + kpi_tracker[kpi_name].add_record(kpi_value) + kpi_tracker[kpi_name].persist() + + +if __name__ == '__main__': + log = sys.stdin.read() + print("*****") + print(log) + print("****") + log_to_ce(log) \ No newline at end of file diff --git a/fluid/neural_machine_translation/transformer/config.py b/fluid/neural_machine_translation/transformer/config.py index e68ab17e69eff890cb8e6b028ead5e6163213761..64068cf9d9e2a550842049f18bea7f19038b9fbf 100644 --- a/fluid/neural_machine_translation/transformer/config.py +++ b/fluid/neural_machine_translation/transformer/config.py @@ -81,6 +81,8 @@ class ModelHyperParams(object): n_layer = 6 # dropout rate used by all dropout layers. dropout = 0.1 + # random seed used in dropout for CE. + dropout_seed = None # the flag indicating whether to share embedding and softmax weights. # vocabularies in source and target should be same for weight sharing. weight_sharing = True diff --git a/fluid/neural_machine_translation/transformer/model.py b/fluid/neural_machine_translation/transformer/model.py index 46c9f7a9065765b1e5ab5fa4d66042fc3312f75a..b4cca6a6bc0d6b58139e3f170977b03c22faaeee 100644 --- a/fluid/neural_machine_translation/transformer/model.py +++ b/fluid/neural_machine_translation/transformer/model.py @@ -111,7 +111,10 @@ def multi_head_attention(queries, x=weights, shape=product.shape, actual_shape=post_softmax_shape) if dropout_rate: weights = layers.dropout( - weights, dropout_prob=dropout_rate, is_test=False) + weights, + dropout_prob=dropout_rate, + seed=ModelHyperParams.dropout_seed, + is_test=False) out = layers.matmul(weights, v) return out @@ -171,7 +174,10 @@ def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.): elif cmd == "d": # add dropout if dropout_rate: out = layers.dropout( - out, dropout_prob=dropout_rate, is_test=False) + out, + dropout_prob=dropout_rate, + seed=ModelHyperParams.dropout_seed, + is_test=False) return out @@ -211,7 +217,9 @@ def prepare_encoder(src_word, shape=[batch_size, seq_len, src_emb_dim], actual_shape=src_data_shape) return layers.dropout( - enc_input, dropout_prob=dropout_rate, + enc_input, + dropout_prob=dropout_rate, + seed=ModelHyperParams.dropout_seed, is_test=False) if dropout_rate else enc_input diff --git a/fluid/neural_machine_translation/transformer/train.py b/fluid/neural_machine_translation/transformer/train.py index 5175c48e62aa6cc480e766478a5be154791c362e..beb2b7d3e6e6db09ac9269cd285f71446bb69270 100644 --- a/fluid/neural_machine_translation/transformer/train.py +++ b/fluid/neural_machine_translation/transformer/train.py @@ -103,6 +103,12 @@ def parse_args(): help="The device type.") parser.add_argument( '--sync', type=ast.literal_eval, default=True, help="sync mode.") + parser.add_argument( + "--enable_ce", + type=ast.literal_eval, + default=True, + help="The flag indicating whether to run the task " + "for continuous evaluation.") args = parser.parse_args() # Append args related to dict @@ -382,6 +388,12 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler, data_input_names, util_input_names, sum_cost, token_num) + # the best cross-entropy value with label smoothing + loss_normalizer = -((1. - TrainTaskConfig.label_smooth_eps) * np.log( + (1. - TrainTaskConfig.label_smooth_eps + )) + TrainTaskConfig.label_smooth_eps * + np.log(TrainTaskConfig.label_smooth_eps / ( + ModelHyperParams.trg_vocab_size - 1) + 1e-20)) init = False for pass_id in xrange(TrainTaskConfig.pass_num): pass_start_time = time.time() @@ -421,19 +433,27 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler, ) # sum the cost from multi-devices total_token_num = token_num_val.sum() total_avg_cost = total_sum_cost / total_token_num - print("epoch: %d, batch: %d, sum loss: %f, avg loss: %f, ppl: %f" % - (pass_id, batch_id, total_sum_cost, total_avg_cost, - np.exp([min(total_avg_cost, 100)]))) + print("epoch: %d, batch: %d, avg loss: %f, normalized loss: %f," + " ppl: %f" % (pass_id, batch_id, total_avg_cost, + total_avg_cost - loss_normalizer, + np.exp([min(total_avg_cost, 100)]))) if batch_id > 0 and batch_id % 1000 == 0: fluid.io.save_persistables( exe, os.path.join(TrainTaskConfig.ckpt_dir, "latest.checkpoint")) init = True + + time_consumed = time.time() - pass_start_time # Validate and save the model for inference. - print("epoch: %d, " % pass_id + - ("val avg loss: %f, val ppl: %f, " % test() - if args.val_file_pattern is not None else "") + "consumed %fs" % - (time.time() - pass_start_time)) + if args.val_file_pattern is not None: + val_avg_cost, val_ppl = test() + print( + "epoch: %d, val avg loss: %f, val normalized loss: %f, val ppl: %f," + " consumed %fs" % (pass_id, val_avg_cost, + val_avg_cost - loss_normalizer, val_ppl, + time_consumed)) + else: + print("epoch: %d, consumed %fs" % (pass_id, time_consumed)) fluid.io.save_persistables( exe, os.path.join(TrainTaskConfig.ckpt_dir, @@ -442,6 +462,10 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler, os.path.join(TrainTaskConfig.model_dir, "pass_" + str(pass_id) + ".infer.model"), data_input_names[:-2] + util_input_names, [predict], exe) + if args.enable_ce: # For CE + print("kpis\ttrain_cost\t%f" % total_avg_cost) + print("kpis\ttest_cost\t%f" % val_avg_cost) + print("kpis\ttrain_duration\t%f" % time_consumed) def train(args): @@ -465,6 +489,9 @@ def train(args): exe = fluid.Executor(place) + if args.enable_ce: + fluid.default_startup_program().random_seed = 1000 + sum_cost, avg_cost, predict, token_num = transformer( ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size, ModelHyperParams.max_length + 1, ModelHyperParams.n_layer, diff --git a/fluid/neural_machine_translation_transformer b/fluid/neural_machine_translation_transformer new file mode 120000 index 0000000000000000000000000000000000000000..2420ba519357fc22e19220b6f534f0f2e20ccee5 --- /dev/null +++ b/fluid/neural_machine_translation_transformer @@ -0,0 +1 @@ +./neural_machine_translation/transformer \ No newline at end of file diff --git a/fluid/object_detection/train.py b/fluid/object_detection/train.py index aadcc904f55f077c06630a1f8e27a6bf4b422c05..46af235ff7f6c3067e1cc2d35de76ebaf59be885 100644 --- a/fluid/object_detection/train.py +++ b/fluid/object_detection/train.py @@ -193,13 +193,16 @@ def train(args, total_time += end_time - start_time train_avg_loss = np.mean(every_pass_loss) if devices_num == 1: - print ("kpis train_cost %s" % train_avg_loss) - print ("kpis test_acc %s" % mean_map) - print ("kpis train_speed %s" % (total_time / epoch_idx)) + print ("kpis train_cost %s" % train_avg_loss) + print ("kpis test_acc %s" % mean_map) + print ("kpis train_speed %s" % (total_time / epoch_idx)) else: - print ("kpis train_cost_card%s %s" % (devices_num, train_avg_loss)) - print ("kpis test_acc_card%s %s" % (devices_num, mean_map)) - print ("kpis train_speed_card%s %f" % (devices_num, total_time / epoch_idx)) + print ("kpis train_cost_card%s %s" % + (devices_num, train_avg_loss)) + print ("kpis test_acc_card%s %s" % + (devices_num, mean_map)) + print ("kpis train_speed_card%s %f" % + (devices_num, total_time / epoch_idx)) if pass_id % 10 == 0 or pass_id == num_passes - 1: diff --git a/fluid/sequence_tagging_for_ner/.run_ce.sh b/fluid/sequence_tagging_for_ner/.run_ce.sh new file mode 100755 index 0000000000000000000000000000000000000000..e4d1c58bcc369e3164cb16b9f715df2775912762 --- /dev/null +++ b/fluid/sequence_tagging_for_ner/.run_ce.sh @@ -0,0 +1,6 @@ +###!/bin/bash +####This file is only used for continuous evaluation. + +export CE_MODE_X=1 +sh data/download.sh +python train.py | python _ce.py diff --git a/fluid/sequence_tagging_for_ner/README.md b/fluid/sequence_tagging_for_ner/README.md index 1f634da4e2e385b06589cde0c6979812ff52e450..ff7b184d8da56c85edf7b34c4dca1cd41a4cc3a3 100644 --- a/fluid/sequence_tagging_for_ner/README.md +++ b/fluid/sequence_tagging_for_ner/README.md @@ -22,11 +22,7 @@ ## 数据获取 -请参考PaddlePaddle v2版本[命名实体识别](https://github.com/PaddlePaddle/models/blob/develop/sequence_tagging_for_ner/README.md) 一节中数据获取方式,将该例中的data文件夹拷贝至本例目录下,运行其中的download.sh脚本获取训练和测试数据。 - -## 通用脚本获取 - -请将PaddlePaddle v2版本[命名实体识别](https://github.com/PaddlePaddle/models/blob/develop/sequence_tagging_for_ner/README.md)中提供的用于数据读取的文件[reader.py](https://github.com/PaddlePaddle/models/blob/develop/sequence_tagging_for_ner/reader.py)以及包含字典导入等通用功能的文件[utils.py](https://github.com/PaddlePaddle/models/blob/develop/sequence_tagging_for_ner/utils.py)复制到本目录下。本例将会使用到这两个脚本。 +完整数据的获取请参考PaddlePaddle v2版本[命名实体识别](https://github.com/PaddlePaddle/models/blob/develop/sequence_tagging_for_ner/README.md) 一节中的方式。本例的示例数据同样可以通过运行data/download.sh来获取。 ## 训练 diff --git a/fluid/sequence_tagging_for_ner/_ce.py b/fluid/sequence_tagging_for_ner/_ce.py new file mode 100644 index 0000000000000000000000000000000000000000..111a4d566b1cb69543bd7747cd76568f7de4b94c --- /dev/null +++ b/fluid/sequence_tagging_for_ner/_ce.py @@ -0,0 +1,48 @@ +####this file is only used for continuous evaluation test! + +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import CostKpi, DurationKpi, AccKpi + +#### NOTE kpi.py should shared in models in some way!!!! + +train_acc_kpi = AccKpi('train_precision', 0.005, actived=True) +test_acc_kpi = CostKpi('test_precision', 0.005, actived=True) +train_duration_kpi = DurationKpi('train_duration', 0.05, actived=True) + +tracking_kpis = [ + train_acc_kpi, + test_acc_kpi, + train_duration_kpi, +] + + +def parse_log(log): + for line in log.split('\n'): + fs = line.strip().split('\t') + print(fs) + if len(fs) == 3 and fs[0] == 'kpis': + print("-----%s" % fs) + kpi_name = fs[1] + kpi_value = float(fs[2]) + yield kpi_name, kpi_value + + +def log_to_ce(log): + kpi_tracker = {} + for kpi in tracking_kpis: + kpi_tracker[kpi.name] = kpi + + for (kpi_name, kpi_value) in parse_log(log): + print(kpi_name, kpi_value) + kpi_tracker[kpi_name].add_record(kpi_value) + kpi_tracker[kpi_name].persist() + + +if __name__ == '__main__': + log = sys.stdin.read() + print("*****") + print(log) + print("****") + log_to_ce(log) diff --git a/fluid/sequence_tagging_for_ner/data/download.sh b/fluid/sequence_tagging_for_ner/data/download.sh new file mode 100755 index 0000000000000000000000000000000000000000..e61df4d3b11a5c39fed22031a86f0030318adaf9 --- /dev/null +++ b/fluid/sequence_tagging_for_ner/data/download.sh @@ -0,0 +1,17 @@ +if [ -f assignment2.zip ]; then + echo "data exist" + exit 0 +else + wget http://cs224d.stanford.edu/assignment2/assignment2.zip +fi + +if [ $? -eq 0 ];then + unzip assignment2.zip + cp assignment2_release/data/ner/wordVectors.txt ./data + cp assignment2_release/data/ner/vocab.txt ./data + rm -rf assignment2_release +else + echo "download data error!" >> /dev/stderr + exit 1 +fi + diff --git a/fluid/sequence_tagging_for_ner/data/target.txt b/fluid/sequence_tagging_for_ner/data/target.txt new file mode 100644 index 0000000000000000000000000000000000000000..e0fa4d8f6654be07b4d1188750abb861d7c6f264 --- /dev/null +++ b/fluid/sequence_tagging_for_ner/data/target.txt @@ -0,0 +1,9 @@ +B-LOC +I-LOC +B-MISC +I-MISC +B-ORG +I-ORG +B-PER +I-PER +O diff --git a/fluid/sequence_tagging_for_ner/data/test b/fluid/sequence_tagging_for_ner/data/test new file mode 100644 index 0000000000000000000000000000000000000000..66163e1a869d57303117dd94d59ff01be05de8f7 --- /dev/null +++ b/fluid/sequence_tagging_for_ner/data/test @@ -0,0 +1,128 @@ +CRICKET NNP I-NP O +- : O O +LEICESTERSHIRE NNP I-NP I-ORG +TAKE NNP I-NP O +OVER IN I-PP O +AT NNP I-NP O +TOP NNP I-NP O +AFTER NNP I-NP O +INNINGS NNP I-NP O +VICTORY NN I-NP O +. . O O + +LONDON NNP I-NP I-LOC +1996-08-30 CD I-NP O + +West NNP I-NP I-MISC +Indian NNP I-NP I-MISC +all-rounder NN I-NP O +Phil NNP I-NP I-PER +Simmons NNP I-NP I-PER +took VBD I-VP O +four CD I-NP O +for IN I-PP O +38 CD I-NP O +on IN I-PP O +Friday NNP I-NP O +as IN I-PP O +Leicestershire NNP I-NP I-ORG +beat VBD I-VP O +Somerset NNP I-NP I-ORG +by IN I-PP O +an DT I-NP O +innings NN I-NP O +and CC O O +39 CD I-NP O +runs NNS I-NP O +in IN I-PP O +two CD I-NP O +days NNS I-NP O +to TO I-VP O +take VB I-VP O +over IN I-PP O +at IN B-PP O +the DT I-NP O +head NN I-NP O +of IN I-PP O +the DT I-NP O +county NN I-NP O +championship NN I-NP O +. . O O + +Their PRP$ I-NP O +stay NN I-NP O +on IN I-PP O +top NN I-NP O +, , O O +though RB I-ADVP O +, , O O +may MD I-VP O +be VB I-VP O +short-lived JJ I-ADJP O +as IN I-PP O +title NN I-NP O +rivals NNS I-NP O +Essex NNP I-NP I-ORG +, , O O +Derbyshire NNP I-NP I-ORG +and CC I-NP O +Surrey NNP I-NP I-ORG +all DT O O +closed VBD I-VP O +in RP I-PRT O +on IN I-PP O +victory NN I-NP O +while IN I-SBAR O +Kent NNP I-NP I-ORG +made VBD I-VP O +up RP I-PRT O +for IN I-PP O +lost VBN I-NP O +time NN I-NP O +in IN I-PP O +their PRP$ I-NP O +rain-affected JJ I-NP O +match NN I-NP O +against IN I-PP O +Nottinghamshire NNP I-NP I-ORG +. . O O + +After IN I-PP O +bowling VBG I-NP O +Somerset NNP I-NP I-ORG +out RP I-PRT O +for IN I-PP O +83 CD I-NP O +on IN I-PP O +the DT I-NP O +opening NN I-NP O +morning NN I-NP O +at IN I-PP O +Grace NNP I-NP I-LOC +Road NNP I-NP I-LOC +, , O O +Leicestershire NNP I-NP I-ORG +extended VBD I-VP O +their PRP$ I-NP O +first JJ I-NP O +innings NN I-NP O +by IN I-PP O +94 CD I-NP O +runs VBZ I-VP O +before IN I-PP O +being VBG I-VP O +bowled VBD I-VP O +out RP I-PRT O +for IN I-PP O +296 CD I-NP O +with IN I-PP O +England NNP I-NP I-LOC +discard VBP I-VP O +Andy NNP I-NP I-PER +Caddick NNP I-NP I-PER +taking VBG I-VP O +three CD I-NP O +for IN I-PP O +83 CD I-NP O +. . O O + diff --git a/fluid/sequence_tagging_for_ner/data/train b/fluid/sequence_tagging_for_ner/data/train new file mode 100644 index 0000000000000000000000000000000000000000..cbf3e678c555a3b6db26fd14e38889f040f048ca --- /dev/null +++ b/fluid/sequence_tagging_for_ner/data/train @@ -0,0 +1,139 @@ +EU NNP I-NP I-ORG +rejects VBZ I-VP O +German JJ I-NP I-MISC +call NN I-NP O +to TO I-VP O +boycott VB I-VP O +British JJ I-NP I-MISC +lamb NN I-NP O +. . O O + +Peter NNP I-NP I-PER +Blackburn NNP I-NP I-PER + +BRUSSELS NNP I-NP I-LOC +1996-08-22 CD I-NP O + +The DT I-NP O +European NNP I-NP I-ORG +Commission NNP I-NP I-ORG +said VBD I-VP O +on IN I-PP O +Thursday NNP I-NP O +it PRP B-NP O +disagreed VBD I-VP O +with IN I-PP O +German JJ I-NP I-MISC +advice NN I-NP O +to TO I-PP O +consumers NNS I-NP O +to TO I-VP O +shun VB I-VP O +British JJ I-NP I-MISC +lamb NN I-NP O +until IN I-SBAR O +scientists NNS I-NP O +determine VBP I-VP O +whether IN I-SBAR O +mad JJ I-NP O +cow NN I-NP O +disease NN I-NP O +can MD I-VP O +be VB I-VP O +transmitted VBN I-VP O +to TO I-PP O +sheep NN I-NP O +. . O O + +Germany NNP I-NP I-LOC +'s POS B-NP O +representative NN I-NP O +to TO I-PP O +the DT I-NP O +European NNP I-NP I-ORG +Union NNP I-NP I-ORG +'s POS B-NP O +veterinary JJ I-NP O +committee NN I-NP O +Werner NNP I-NP I-PER +Zwingmann NNP I-NP I-PER +said VBD I-VP O +on IN I-PP O +Wednesday NNP I-NP O +consumers NNS I-NP O +should MD I-VP O +buy VB I-VP O +sheepmeat NN I-NP O +from IN I-PP O +countries NNS I-NP O +other JJ I-ADJP O +than IN I-PP O +Britain NNP I-NP I-LOC +until IN I-SBAR O +the DT I-NP O +scientific JJ I-NP O +advice NN I-NP O +was VBD I-VP O +clearer JJR I-ADJP O +. . O O + +" " O O +We PRP I-NP O +do VBP I-VP O +n't RB I-VP O +support VB I-VP O +any DT I-NP O +such JJ I-NP O +recommendation NN I-NP O +because IN I-SBAR O +we PRP I-NP O +do VBP I-VP O +n't RB I-VP O +see VB I-VP O +any DT I-NP O +grounds NNS I-NP O +for IN I-PP O +it PRP I-NP O +, , O O +" " O O +the DT I-NP O +Commission NNP I-NP I-ORG +'s POS B-NP O +chief JJ I-NP O +spokesman NN I-NP O +Nikolaus NNP I-NP I-PER +van NNP I-NP I-PER +der FW I-NP I-PER +Pas NNP I-NP I-PER +told VBD I-VP O +a DT I-NP O +news NN I-NP O +briefing NN I-NP O +. . O O + +He PRP I-NP O +said VBD I-VP O +further JJ I-NP O +scientific JJ I-NP O +study NN I-NP O +was VBD I-VP O +required VBN I-VP O +and CC O O +if IN I-SBAR O +it PRP I-NP O +was VBD I-VP O +found VBN I-VP O +that IN I-SBAR O +action NN I-NP O +was VBD I-VP O +needed VBN I-VP O +it PRP I-NP O +should MD I-VP O +be VB I-VP O +taken VBN I-VP O +by IN I-PP O +the DT I-NP O +European NNP I-NP I-ORG +Union NNP I-NP I-ORG +. . O O + diff --git a/fluid/sequence_tagging_for_ner/reader.py b/fluid/sequence_tagging_for_ner/reader.py new file mode 100644 index 0000000000000000000000000000000000000000..5050d0bf499e59db505758b0af9eed71e6af7de7 --- /dev/null +++ b/fluid/sequence_tagging_for_ner/reader.py @@ -0,0 +1,66 @@ +""" +Conll03 dataset. +""" + +from utils import * + +__all__ = ["data_reader"] + + +def canonicalize_digits(word): + if any([c.isalpha() for c in word]): return word + word = re.sub("\d", "DG", word) + if word.startswith("DG"): + word = word.replace(",", "") # remove thousands separator + return word + + +def canonicalize_word(word, wordset=None, digits=True): + word = word.lower() + if digits: + if (wordset != None) and (word in wordset): return word + word = canonicalize_digits(word) # try to canonicalize numbers + if (wordset == None) or (word in wordset): return word + else: return "UUUNKKK" # unknown token + + +def data_reader(data_file, word_dict, label_dict): + """ + The dataset can be obtained according to http://www.clips.uantwerpen.be/conll2003/ner/. + It returns a reader creator, each sample in the reader includes: + word id sequence, label id sequence and raw sentence. + + :return: reader creator + :rtype: callable + """ + + def reader(): + UNK_IDX = word_dict["UUUNKKK"] + + sentence = [] + labels = [] + with open(data_file, "r") as f: + for line in f: + if len(line.strip()) == 0: + if len(sentence) > 0: + word_idx = [ + word_dict.get( + canonicalize_word(w, word_dict), UNK_IDX) + for w in sentence + ] + mark = [1 if w[0].isupper() else 0 for w in sentence] + label_idx = [label_dict[l] for l in labels] + yield word_idx, mark, label_idx + sentence = [] + labels = [] + else: + segs = line.strip().split() + sentence.append(segs[0]) + # transform I-TYPE to BIO schema + if segs[-1] != "O" and (len(labels) == 0 or + labels[-1][1:] != segs[-1][1:]): + labels.append("B" + segs[-1][1:]) + else: + labels.append(segs[-1]) + + return reader diff --git a/fluid/sequence_tagging_for_ner/train.py b/fluid/sequence_tagging_for_ner/train.py index 6ed77cd5ca1d504a8b79b4f87349242b5051c539..7a6e992df459157d0a74839d1e3fbef7213a4b14 100644 --- a/fluid/sequence_tagging_for_ner/train.py +++ b/fluid/sequence_tagging_for_ner/train.py @@ -1,8 +1,9 @@ import os import math +import time import numpy as np -import paddle.v2 as paddle +import paddle import paddle.fluid as fluid import reader @@ -24,12 +25,19 @@ def test(exe, chunk_evaluator, inference_program, test_data, place): return chunk_evaluator.eval(exe) -def main(train_data_file, test_data_file, vocab_file, target_file, emb_file, - model_save_dir, num_passes, use_gpu, parallel): +def main(train_data_file, + test_data_file, + vocab_file, + target_file, + emb_file, + model_save_dir, + num_passes, + use_gpu, + parallel, + batch_size=200): if not os.path.exists(model_save_dir): os.mkdir(model_save_dir) - BATCH_SIZE = 200 word_dict = load_dict(vocab_file) label_dict = load_dict(target_file) @@ -53,60 +61,76 @@ def main(train_data_file, test_data_file, vocab_file, target_file, emb_file, chunk_scheme="IOB", num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0))) - inference_program = fluid.default_main_program().clone() + inference_program = fluid.default_main_program().clone(for_test=True) with fluid.program_guard(inference_program): test_target = chunk_evaluator.metrics + chunk_evaluator.states inference_program = fluid.io.get_inference_program(test_target) - train_reader = paddle.batch( - paddle.reader.shuffle( + if "CE_MODE_X" not in os.environ: + train_reader = paddle.batch( + paddle.reader.shuffle( + reader.data_reader(train_data_file, word_dict, label_dict), + buf_size=20000), + batch_size=batch_size) + test_reader = paddle.batch( + paddle.reader.shuffle( + reader.data_reader(test_data_file, word_dict, label_dict), + buf_size=20000), + batch_size=batch_size) + else: + train_reader = paddle.batch( reader.data_reader(train_data_file, word_dict, label_dict), - buf_size=20000), - batch_size=BATCH_SIZE) - test_reader = paddle.batch( - paddle.reader.shuffle( + batch_size=batch_size) + test_reader = paddle.batch( reader.data_reader(test_data_file, word_dict, label_dict), - buf_size=20000), - batch_size=BATCH_SIZE) + batch_size=batch_size) place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() feeder = fluid.DataFeeder(feed_list=[word, mark, target], place=place) exe = fluid.Executor(place) + if "CE_MODE_X" in os.environ: + fluid.default_startup_program().random_seed = 110 exe.run(fluid.default_startup_program()) embedding_name = 'emb' embedding_param = fluid.global_scope().find_var(embedding_name).get_tensor() embedding_param.set(word_vector_values, place) - batch_id = 0 for pass_id in xrange(num_passes): chunk_evaluator.reset(exe) - for data in train_reader(): + for batch_id, data in enumerate(train_reader()): cost, batch_precision, batch_recall, batch_f1_score = exe.run( fluid.default_main_program(), feed=feeder.feed(data), fetch_list=[avg_cost] + chunk_evaluator.metrics) if batch_id % 5 == 0: + print(cost) print("Pass " + str(pass_id) + ", Batch " + str( batch_id) + ", Cost " + str(cost[0]) + ", Precision " + str( batch_precision[0]) + ", Recall " + str(batch_recall[0]) + ", F1_score" + str(batch_f1_score[0])) - batch_id = batch_id + 1 pass_precision, pass_recall, pass_f1_score = chunk_evaluator.eval(exe) print("[TrainSet] pass_id:" + str(pass_id) + " pass_precision:" + str( pass_precision) + " pass_recall:" + str(pass_recall) + " pass_f1_score:" + str(pass_f1_score)) - pass_precision, pass_recall, pass_f1_score = test( + test_pass_precision, test_pass_recall, test_pass_f1_score = test( exe, chunk_evaluator, inference_program, test_reader, place) print("[TestSet] pass_id:" + str(pass_id) + " pass_precision:" + str( - pass_precision) + " pass_recall:" + str(pass_recall) + - " pass_f1_score:" + str(pass_f1_score)) + test_pass_precision) + " pass_recall:" + str(test_pass_recall) + + " pass_f1_score:" + str(test_pass_f1_score)) save_dirname = os.path.join(model_save_dir, "params_pass_%d" % pass_id) fluid.io.save_inference_model(save_dirname, ['word', 'mark', 'target'], - [crf_decode], exe) + crf_decode, exe) + + if ("CE_MODE_X" in os.environ) and (pass_id % 50 == 0): + if pass_id > 0: + print("kpis train_precision %f" % pass_precision) + print("kpis test_precision %f" % test_pass_precision) + print("kpis train_duration %f" % (time.time() - time_begin)) + time_begin = time.time() if __name__ == "__main__": @@ -118,5 +142,6 @@ if __name__ == "__main__": emb_file="data/wordVectors.txt", model_save_dir="models", num_passes=1000, + batch_size=1, use_gpu=False, parallel=False) diff --git a/fluid/sequence_tagging_for_ner/utils.py b/fluid/sequence_tagging_for_ner/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f40f1bb19481e34288ede7247f4fbe827be6f590 --- /dev/null +++ b/fluid/sequence_tagging_for_ner/utils.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import logging +import os +import re +import argparse +import numpy as np +from collections import defaultdict + +logger = logging.getLogger("paddle") +logger.setLevel(logging.INFO) + + +def get_embedding(emb_file='data/wordVectors.txt'): + """ + Get the trained word vector. + """ + return np.loadtxt(emb_file, dtype=float) + + +def load_dict(dict_path): + """ + Load the word dictionary from the given file. + Each line of the given file is a word, which can include multiple columns + seperated by tab. + + This function takes the first column (columns in a line are seperated by + tab) as key and takes line number of a line as the key (index of the word + in the dictionary). + """ + + return dict((line.strip().split("\t")[0], idx) + for idx, line in enumerate(open(dict_path, "r").readlines())) + + +def load_reverse_dict(dict_path): + """ + Load the word dictionary from the given file. + Each line of the given file is a word, which can include multiple columns + seperated by tab. + + This function takes line number of a line as the key (index of the word in + the dictionary) and the first column (columns in a line are seperated by + tab) as the value. + """ + return dict((idx, line.strip().split("\t")[0]) + for idx, line in enumerate(open(dict_path, "r").readlines())) diff --git a/fluid/text_classification/.run_ce.sh b/fluid/text_classification/.run_ce.sh new file mode 100755 index 0000000000000000000000000000000000000000..777ba02e8c78760452a6fb7d4ac1dc4a82d62594 --- /dev/null +++ b/fluid/text_classification/.run_ce.sh @@ -0,0 +1,5 @@ +###!/bin/bash +####This file is only used for continuous evaluation. + +export CE_MODE_X=1 +python train.py cnn | python _ce.py diff --git a/fluid/text_classification/_ce.py b/fluid/text_classification/_ce.py new file mode 100644 index 0000000000000000000000000000000000000000..100357204db7f3a8d0c1d3cbcbdc707410b20023 --- /dev/null +++ b/fluid/text_classification/_ce.py @@ -0,0 +1,48 @@ +####this file is only used for continuous evaluation test! + +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import CostKpi, DurationKpi, AccKpi + +#### NOTE kpi.py should shared in models in some way!!!! + +train_acc_kpi = AccKpi('train_acc', 0.005, actived=True) +train_cost_kpi = CostKpi('train_cost', 0.005, actived=True) +train_duration_kpi = DurationKpi('train_duration', 0.05, actived=True) + +tracking_kpis = [ + train_acc_kpi, + train_cost_kpi, + train_duration_kpi, +] + + +def parse_log(log): + for line in log.split('\n'): + fs = line.strip().split('\t') + print(fs) + if len(fs) == 3 and fs[0] == 'kpis': + print("-----%s" % fs) + kpi_name = fs[1] + kpi_value = float(fs[2]) + yield kpi_name, kpi_value + + +def log_to_ce(log): + kpi_tracker = {} + for kpi in tracking_kpis: + kpi_tracker[kpi.name] = kpi + + for (kpi_name, kpi_value) in parse_log(log): + print(kpi_name, kpi_value) + kpi_tracker[kpi_name].add_record(kpi_value) + kpi_tracker[kpi_name].persist() + + +if __name__ == '__main__': + log = sys.stdin.read() + print("*****") + print(log) + print("****") + log_to_ce(log) diff --git a/fluid/text_classification/clouds/scdb_parallel_executor.py b/fluid/text_classification/clouds/scdb_parallel_executor.py index 042f02b53fc1ba9d29563a91d7da3643790a22e9..9d7722e9776d11c591f1ff0bd97b3e295205d300 100644 --- a/fluid/text_classification/clouds/scdb_parallel_executor.py +++ b/fluid/text_classification/clouds/scdb_parallel_executor.py @@ -1,7 +1,7 @@ import unittest import contextlib +import paddle import paddle.fluid as fluid -import paddle.v2 as paddle import numpy as np import sys import time diff --git a/fluid/text_classification/clouds/scdb_single_card.py b/fluid/text_classification/clouds/scdb_single_card.py index 490c4f3791c1566cc67951f5098cbef8ab171b59..9cc39269913ab97341e5389b31ad9a5da2e8af51 100644 --- a/fluid/text_classification/clouds/scdb_single_card.py +++ b/fluid/text_classification/clouds/scdb_single_card.py @@ -1,7 +1,7 @@ import unittest import contextlib +import paddle import paddle.fluid as fluid -import paddle.v2 as paddle import numpy as np import sys import time diff --git a/fluid/text_classification/infer.py b/fluid/text_classification/infer.py index d2a0363d786866a92195dba8b490287b3ca9bc9d..923dd98dc27f6a4c5e369f30d9aca49e63aea7a5 100644 --- a/fluid/text_classification/infer.py +++ b/fluid/text_classification/infer.py @@ -4,8 +4,8 @@ import unittest import contextlib import numpy as np +import paddle import paddle.fluid as fluid -import paddle.v2 as paddle import utils diff --git a/fluid/text_classification/nets.py b/fluid/text_classification/nets.py index 98028c871a83cafe6d1de7b545f333c4581e0a40..6ba637dd087afd8e45ad8d0752ac9850ec49e627 100644 --- a/fluid/text_classification/nets.py +++ b/fluid/text_classification/nets.py @@ -2,8 +2,8 @@ import sys import time import numpy as np +import paddle import paddle.fluid as fluid -import paddle.v2 as paddle def bow_net(data, diff --git a/fluid/text_classification/train.py b/fluid/text_classification/train.py index dc164671e785b758365885b98788fae71d5f8a87..9078f4788319dbf76677c86eef53445fa1e85c1a 100644 --- a/fluid/text_classification/train.py +++ b/fluid/text_classification/train.py @@ -1,10 +1,11 @@ +import os import sys import time import unittest import contextlib +import paddle import paddle.fluid as fluid -import paddle.v2 as paddle import utils from nets import bow_net @@ -53,8 +54,12 @@ def train(train_reader, exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[data, label], place=place) + # For internal continuous evaluation + if "CE_MODE_X" in os.environ: + fluid.default_startup_program().random_seed = 110 exe.run(fluid.default_startup_program()) for pass_id in xrange(pass_num): + pass_start = time.time() data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0 for data in train_reader(): avg_cost_np, avg_acc_np = exe.run(fluid.default_main_program(), @@ -73,6 +78,13 @@ def train(train_reader, epoch_model = save_dirname + "/" + "epoch" + str(pass_id) fluid.io.save_inference_model(epoch_model, ["words", "label"], acc, exe) + pass_end = time.time() + # For internal continuous evaluation + if "CE_MODE_X" in os.environ: + print("kpis train_acc %f" % avg_acc) + print("kpis train_cost %f" % avg_cost) + print("kpis train_duration %f" % (pass_end - pass_start)) + def train_net(): word_dict, train_reader, test_reader = utils.prepare_data( diff --git a/fluid/text_classification/utils.py b/fluid/text_classification/utils.py index fba14dde63d27ada07d8fcd69cacfb631559e613..3673946b6f39eade1811dfc4d81c99b0ef9400bb 100644 --- a/fluid/text_classification/utils.py +++ b/fluid/text_classification/utils.py @@ -1,9 +1,10 @@ +import os import sys import time import numpy as np +import paddle import paddle.fluid as fluid -import paddle.v2 as paddle def to_lodtensor(data, place): @@ -64,15 +65,22 @@ def prepare_data(data_type="imdb", raise RuntimeError("No such dataset") if data_type == "imdb": - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.imdb.train(word_dict), buf_size=buf_size), - batch_size=batch_size) + if "CE_MODE_X" in os.environ: + train_reader = paddle.batch( + paddle.dataset.imdb.train(word_dict), batch_size=batch_size) - test_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.imdb.test(word_dict), buf_size=buf_size), - batch_size=batch_size) + test_reader = paddle.batch( + paddle.dataset.imdb.test(word_dict), batch_size=batch_size) + else: + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.imdb.train(word_dict), buf_size=buf_size), + batch_size=batch_size) + + test_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.imdb.test(word_dict), buf_size=buf_size), + batch_size=batch_size) else: raise RuntimeError("no such dataset")