diff --git a/fluid/PaddleCV/deeplabv3+/.run_ce.sh b/fluid/PaddleCV/deeplabv3+/.run_ce.sh new file mode 100755 index 0000000000000000000000000000000000000000..540fb964ba94fd29dc28bb51342cdba839d433e7 --- /dev/null +++ b/fluid/PaddleCV/deeplabv3+/.run_ce.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +export MKL_NUM_THREADS=1 +export OMP_NUM_THREADS=1 + +DATASET_PATH=${HOME}/.cache/paddle/dataset/cityscape/ + +cudaid=${deeplabv3plus:=0} # use 0-th card as default +export CUDA_VISIBLE_DEVICES=$cudaid + +FLAGS_benchmark=true python train.py \ +--batch_size=2 \ +--train_crop_size=769 \ +--total_step=50 \ +--save_weights_path=output1 \ +--dataset_path=$DATASET_PATH \ +--enable_ce | python _ce.py + +cudaid=${deeplabv3plus_m:=0,1,2,3} # use 0,1,2,3 card as default +export CUDA_VISIBLE_DEVICES=$cudaid + +FLAGS_benchmark=true python train.py \ +--batch_size=2 \ +--train_crop_size=769 \ +--total_step=50 \ +--save_weights_path=output4 \ +--dataset_path=$DATASET_PATH \ +--enable_ce | python _ce.py diff --git a/fluid/PaddleCV/deeplabv3+/__init__.py b/fluid/PaddleCV/deeplabv3+/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/fluid/PaddleCV/deeplabv3+/_ce.py b/fluid/PaddleCV/deeplabv3+/_ce.py new file mode 100644 index 0000000000000000000000000000000000000000..b0127d6445213b9d3934220fa36e9eb44d3e04b4 --- /dev/null +++ b/fluid/PaddleCV/deeplabv3+/_ce.py @@ -0,0 +1,60 @@ +# this file is only used for continuous evaluation test! + +import os +import sys +sys.path.append(os.environ['ceroot']) +from kpi import CostKpi +from kpi import DurationKpi + +each_pass_duration_card1_kpi = DurationKpi('each_pass_duration_card1', 0.1, 0, actived=True) +train_loss_card1_kpi = CostKpi('train_loss_card1', 0.05, 0) +each_pass_duration_card4_kpi = DurationKpi('each_pass_duration_card4', 0.1, 0, actived=True) +train_loss_card4_kpi = CostKpi('train_loss_card4', 0.05, 0) + +tracking_kpis = [ + each_pass_duration_card1_kpi, + train_loss_card1_kpi, + each_pass_duration_card4_kpi, + train_loss_card4_kpi, + ] + + +def parse_log(log): + ''' + This method should be implemented by model developers. + + The suggestion: + + each line in the log should be key, value, for example: + + " + train_cost\t1.0 + test_cost\t1.0 + train_cost\t1.0 + train_cost\t1.0 + train_acc\t1.2 + " + ''' + for line in log.split('\n'): + fs = line.strip().split('\t') + print(fs) + if len(fs) == 3 and fs[0] == 'kpis': + kpi_name = fs[1] + kpi_value = float(fs[2]) + yield kpi_name, kpi_value + + +def log_to_ce(log): + kpi_tracker = {} + for kpi in tracking_kpis: + kpi_tracker[kpi.name] = kpi + + for (kpi_name, kpi_value) in parse_log(log): + print(kpi_name, kpi_value) + kpi_tracker[kpi_name].add_record(kpi_value) + kpi_tracker[kpi_name].persist() + + +if __name__ == '__main__': + log = sys.stdin.read() + log_to_ce(log) diff --git a/fluid/PaddleCV/deeplabv3+/train.py b/fluid/PaddleCV/deeplabv3+/train.py old mode 100644 new mode 100755 index 817d53d173467f9146918ec9bb6b44141eb0ac3f..e009f76e0e16be9e4a5db532615cefac258fada1 --- a/fluid/PaddleCV/deeplabv3+/train.py +++ b/fluid/PaddleCV/deeplabv3+/train.py @@ -34,6 +34,10 @@ def add_arguments(): add_argument('parallel', bool, False, "using ParallelExecutor.") add_argument('use_gpu', bool, True, "Whether use GPU or CPU.") add_argument('num_classes', int, 19, "Number of classes.") + parser.add_argument( + '--enable_ce', + action='store_true', + help='If set, run the task with continuous evaluation logs.') def load_model(): @@ -51,7 +55,10 @@ def load_model(): else: if args.num_classes == 19: fluid.io.load_params( - exe, dirname=args.init_weights_path, main_program=tp) + exe, + dirname="", + filename=args.init_weights_path, + main_program=tp) else: fluid.io.load_vars( exe, dirname="", filename=args.init_weights_path, vars=myvars) @@ -84,6 +91,15 @@ def loss(logit, label): return loss, label_nignore +def get_cards(args): + if args.enable_ce: + cards = os.environ.get('CUDA_VISIBLE_DEVICES') + num = len(cards.split(",")) + return num + else: + return args.num_devices + + CityscapeDataset = reader.CityscapeDataset parser = argparse.ArgumentParser() @@ -99,6 +115,13 @@ deeplabv3p = models.deeplabv3p sp = fluid.Program() tp = fluid.Program() + +# only for ce +if args.enable_ce: + SEED = 102 + sp.random_seed = SEED + tp.random_seed = SEED + crop_size = args.train_crop_size batch_size = args.batch_size image_shape = [crop_size, crop_size] @@ -155,7 +178,13 @@ if args.parallel: batches = dataset.get_batch_generator(batch_size, total_step) +total_time = 0.0 +epoch_idx = 0 +train_loss = 0 + for i, imgs, labels, names in batches: + epoch_idx += 1 + begin_time = time.time() prev_start_time = time.time() if args.parallel: retv = exe_p.run(fetch_list=[pred.name, loss_mean.name], @@ -167,11 +196,21 @@ for i, imgs, labels, names in batches: 'label': labels}, fetch_list=[pred, loss_mean]) end_time = time.time() + total_time += end_time - begin_time if i % 100 == 0: print("Model is saved to", args.save_weights_path) save_model() print("step {:d}, loss: {:.6f}, step_time_cost: {:.3f}".format( i, np.mean(retv[1]), end_time - prev_start_time)) + # only for ce + train_loss = np.mean(retv[1]) + +if args.enable_ce: + gpu_num = get_cards(args) + print("kpis\teach_pass_duration_card%s\t%s" % + (gpu_num, total_time / epoch_idx)) + print("kpis\ttrain_loss_card%s\t%s" % (gpu_num, train_loss)) + print("Training done. Model is saved to", args.save_weights_path) save_model() diff --git a/fluid/PaddleCV/image_classification/.run_ce.sh b/fluid/PaddleCV/image_classification/.run_ce.sh index 9ba9a4c2c6779694f0e87e12ca85b59afa33f1c0..cc0d894a634bc0add12fd83840990eacf77382cc 100755 --- a/fluid/PaddleCV/image_classification/.run_ce.sh +++ b/fluid/PaddleCV/image_classification/.run_ce.sh @@ -7,6 +7,7 @@ cudaid=${object_detection_cudaid:=0} export CUDA_VISIBLE_DEVICES=$cudaid python train.py --batch_size=${BATCH_SIZE} --num_epochs=5 --enable_ce=True --lr_strategy=cosine_decay | python _ce.py +BATCH_SIZE=224 cudaid=${object_detection_cudaid_m:=0, 1, 2, 3} export CUDA_VISIBLE_DEVICES=$cudaid python train.py --batch_size=${BATCH_SIZE} --num_epochs=5 --enable_ce=True --lr_strategy=cosine_decay | python _ce.py diff --git a/fluid/PaddleCV/image_classification/train.py b/fluid/PaddleCV/image_classification/train.py index ba3c94d129965684e17f692faf653ffa15984371..8e53699ed09d1b8e2e55eb4b35e7a6bb41294720 100644 --- a/fluid/PaddleCV/image_classification/train.py +++ b/fluid/PaddleCV/image_classification/train.py @@ -242,7 +242,7 @@ def train(args): device_num = subprocess.check_output(['nvidia-smi', '-L']).decode().count('\n') train_batch_size = args.batch_size / device_num - test_batch_size = 8 + test_batch_size = 16 if not args.enable_ce: train_reader = paddle.batch( reader.train(), batch_size=train_batch_size, drop_last=True) diff --git a/fluid/PaddleCV/object_detection/data_util.py b/fluid/PaddleCV/object_detection/data_util.py index ac022593119e0008c3f7f3858303cbf5bc717650..e7d6b2b43eee5048fb5d3d8397a3e88aa0f14b49 100644 --- a/fluid/PaddleCV/object_detection/data_util.py +++ b/fluid/PaddleCV/object_detection/data_util.py @@ -68,6 +68,7 @@ class GeneratorEnqueuer(object): try: task() except Exception: + traceback.print_exc() self._stop_event.set() break else: @@ -75,6 +76,7 @@ class GeneratorEnqueuer(object): try: task() except Exception: + traceback.print_exc() self._stop_event.set() break diff --git a/fluid/PaddleCV/object_detection/reader.py b/fluid/PaddleCV/object_detection/reader.py index 59da1b38fb2e9cce8bb99a2773e7fc222ee33bd8..6acc18594e5979308a7ba641002569b0867516a8 100644 --- a/fluid/PaddleCV/object_detection/reader.py +++ b/fluid/PaddleCV/object_detection/reader.py @@ -176,10 +176,17 @@ def coco(settings, file_list, mode, batch_size, shuffle): if mode == 'train' and shuffle: np.random.shuffle(images) batch_out = [] + if '2014' in file_list: + sub_dir = "train2014" if model == "train" else "val2014" + elif '2017' in file_list: + sub_dir = "train2017" if mode == "train" else "val2017" + data_dir = os.path.join(settings.data_dir, sub_dir) for image in images: image_name = image['file_name'] - image_path = os.path.join(settings.data_dir, image_name) - + image_path = os.path.join(data_dir, image_name) + if not os.path.exists(image_path): + raise ValueError("%s is not exist, you should specify " + "data path correctly." % image_path) im = Image.open(image_path) if im.mode == 'L': im = im.convert('RGB') @@ -242,7 +249,9 @@ def pascalvoc(settings, file_list, mode, batch_size, shuffle): image_path, label_path = image.split() image_path = os.path.join(settings.data_dir, image_path) label_path = os.path.join(settings.data_dir, label_path) - + if not os.path.exists(image_path): + raise ValueError("%s is not exist, you should specify " + "data path correctly." % image_path) im = Image.open(image_path) if im.mode == 'L': im = im.convert('RGB') @@ -295,7 +304,6 @@ def train(settings, max_queue=24, enable_ce=False): file_list = os.path.join(settings.data_dir, file_list) - if 'coco' in settings.dataset: generator = coco(settings, file_list, "train", batch_size, shuffle) else: @@ -341,6 +349,9 @@ def test(settings, file_list, batch_size): def infer(settings, image_path): def reader(): + if not os.path.exists(image_path): + raise ValueError("%s is not exist, you should specify " + "data path correctly." % image_path) img = Image.open(image_path) if img.mode == 'L': img = im.convert('RGB') diff --git a/fluid/PaddleNLP/chinese_ner/infer.py b/fluid/PaddleNLP/chinese_ner/infer.py index e22832d38bc5308444201bd302798cf18cae7d99..a15fdb53d89f2f7845e6bb54aa32fe922bb64682 100644 --- a/fluid/PaddleNLP/chinese_ner/infer.py +++ b/fluid/PaddleNLP/chinese_ner/infer.py @@ -52,7 +52,7 @@ def parse_args(): def print_arguments(args): print('----------- Configuration Arguments -----------') - for arg, value in sorted(vars(args).iteritems()): + for arg, value in sorted(vars(args).items()): print('%s: %s' % (arg, value)) print('------------------------------------------------') @@ -61,6 +61,7 @@ def load_reverse_dict(dict_path): return dict((idx, line.strip().split("\t")[0]) for idx, line in enumerate(open(dict_path, "r").readlines())) + def to_lodtensor(data, place): seq_lens = [len(seq) for seq in data] cur_len = 0 @@ -76,7 +77,6 @@ def to_lodtensor(data, place): return res - def infer(args): word = fluid.layers.data(name='word', shape=[1], dtype='int64', lod_level=1) mention = fluid.layers.data( @@ -108,8 +108,8 @@ def infer(args): profiler.reset_profiler() iters = 0 for data in test_data(): - word = to_lodtensor(map(lambda x: x[0], data), place) - mention = to_lodtensor(map(lambda x: x[1], data), place) + word = to_lodtensor(list(map(lambda x: x[0], data)), place) + mention = to_lodtensor(list(map(lambda x: x[1], data)), place) start = time.time() crf_decode = exe.run(inference_program, @@ -122,12 +122,12 @@ def infer(args): np_data = np.array(crf_decode[0]) word_count = 0 assert len(data) == len(lod_info) - 1 - for sen_index in xrange(len(data)): + for sen_index in range(len(data)): assert len(data[sen_index][0]) == lod_info[ sen_index + 1] - lod_info[sen_index] word_index = 0 - for tag_index in xrange(lod_info[sen_index], - lod_info[sen_index + 1]): + for tag_index in range(lod_info[sen_index], + lod_info[sen_index + 1]): word = str(data[sen_index][0][word_index]) gold_tag = label_reverse_dict[data[sen_index][2][ word_index]] diff --git a/fluid/PaddleNLP/chinese_ner/train.py b/fluid/PaddleNLP/chinese_ner/train.py index 7e59d2ed0793ae9499fc2a6618e762a9ac426800..7684f683e77b35be84b5753793f97308c7763cd8 100644 --- a/fluid/PaddleNLP/chinese_ner/train.py +++ b/fluid/PaddleNLP/chinese_ner/train.py @@ -65,7 +65,7 @@ def parse_args(): def print_arguments(args): print('----------- Configuration Arguments -----------') - for arg, value in sorted(vars(args).iteritems()): + for arg, value in sorted(vars(args).items()): print('%s: %s' % (arg, value)) print('------------------------------------------------') @@ -220,9 +220,9 @@ def test2(exe, chunk_evaluator, inference_program, test_data, place, cur_fetch_list): chunk_evaluator.reset() for data in test_data(): - word = to_lodtensor(map(lambda x: x[0], data), place) - mention = to_lodtensor(map(lambda x: x[1], data), place) - target = to_lodtensor(map(lambda x: x[2], data), place) + word = to_lodtensor(list(map(lambda x: x[0], data)), place) + mention = to_lodtensor(list(map(lambda x: x[1], data)), place) + target = to_lodtensor(list(map(lambda x: x[2], data)), place) result_list = exe.run( inference_program, feed={"word": word, @@ -232,8 +232,9 @@ def test2(exe, chunk_evaluator, inference_program, test_data, place, number_infer = np.array(result_list[0]) number_label = np.array(result_list[1]) number_correct = np.array(result_list[2]) - chunk_evaluator.update(number_infer[0], number_label[0], - number_correct[0]) + chunk_evaluator.update(number_infer[0].astype('int64'), + number_label[0].astype('int64'), + number_correct[0].astype('int64')) return chunk_evaluator.eval() @@ -241,9 +242,9 @@ def test(test_exe, chunk_evaluator, inference_program, test_data, place, cur_fetch_list): chunk_evaluator.reset() for data in test_data(): - word = to_lodtensor(map(lambda x: x[0], data), place) - mention = to_lodtensor(map(lambda x: x[1], data), place) - target = to_lodtensor(map(lambda x: x[2], data), place) + word = to_lodtensor(list(map(lambda x: x[0], data)), place) + mention = to_lodtensor(list(map(lambda x: x[1], data)), place) + target = to_lodtensor(list(map(lambda x: x[2], data)), place) result_list = test_exe.run( fetch_list=cur_fetch_list, feed={"word": word, @@ -252,8 +253,9 @@ def test(test_exe, chunk_evaluator, inference_program, test_data, place, number_infer = np.array(result_list[0]) number_label = np.array(result_list[1]) number_correct = np.array(result_list[2]) - chunk_evaluator.update(number_infer.sum(), - number_label.sum(), number_correct.sum()) + chunk_evaluator.update(number_infer.sum().astype('int64'), + number_label.sum().astype('int64'), + number_correct.sum().astype('int64')) return chunk_evaluator.eval() @@ -270,11 +272,6 @@ def main(args): crf_decode = fluid.layers.crf_decoding( input=feature_out, param_attr=fluid.ParamAttr(name='crfw')) - inference_program = fluid.default_main_program().clone(for_test=True) - - sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-3) - sgd_optimizer.minimize(avg_cost) - (precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks) = fluid.layers.chunk_eval( input=crf_decode, @@ -282,6 +279,11 @@ def main(args): chunk_scheme="IOB", num_chunk_types=int(math.ceil((args.label_dict_len - 1) / 2.0))) + inference_program = fluid.default_main_program().clone(for_test=True) + + sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-3) + sgd_optimizer.minimize(avg_cost) + chunk_evaluator = fluid.metrics.ChunkEvaluator() train_reader = paddle.batch( @@ -312,7 +314,7 @@ def main(args): test_exe = exe batch_id = 0 - for pass_id in xrange(args.num_passes): + for pass_id in range(args.num_passes): chunk_evaluator.reset() train_reader_iter = train_reader() start_time = time.time() @@ -326,9 +328,9 @@ def main(args): ], feed=feeder.feed(cur_batch)) chunk_evaluator.update( - np.array(nums_infer).sum(), - np.array(nums_label).sum(), - np.array(nums_correct).sum()) + np.array(nums_infer).sum().astype("int64"), + np.array(nums_label).sum().astype("int64"), + np.array(nums_correct).sum().astype("int64")) cost_list = np.array(cost) batch_id += 1 except StopIteration: diff --git a/fluid/PaddleNLP/deep_attention_matching_net/_ce.py b/fluid/PaddleNLP/deep_attention_matching_net/_ce.py index 0c38c0a3d1b0fc0a240a7bae928d9c07f8b95886..7ad30288074da3124c33fad6c96fd369a812c77c 100644 --- a/fluid/PaddleNLP/deep_attention_matching_net/_ce.py +++ b/fluid/PaddleNLP/deep_attention_matching_net/_ce.py @@ -7,8 +7,8 @@ from kpi import CostKpi, DurationKpi, AccKpi #### NOTE kpi.py should shared in models in some way!!!! -train_cost_kpi = CostKpi('train_cost', 0.02, actived=True) -train_duration_kpi = DurationKpi('train_duration', 0.05, actived=True) +train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True) +train_duration_kpi = DurationKpi('train_duration', 0.05, 0, actived=True) tracking_kpis = [ train_cost_kpi, diff --git a/fluid/PaddleNLP/machine_reading_comprehension/_ce.py b/fluid/PaddleNLP/machine_reading_comprehension/_ce.py index cff13c8722007987a3cd82f1298206248963e45a..a425fe951fb587749f31b18959917cdeed76a41d 100644 --- a/fluid/PaddleNLP/machine_reading_comprehension/_ce.py +++ b/fluid/PaddleNLP/machine_reading_comprehension/_ce.py @@ -3,6 +3,7 @@ import os import sys #sys.path.insert(0, os.environ['ceroot']) +sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi #### NOTE kpi.py should shared in models in some way!!!! diff --git a/fluid/PaddleNLP/machine_reading_comprehension/dataset.py b/fluid/PaddleNLP/machine_reading_comprehension/dataset.py index 3aaf87be9a7b0659fa9e79eb8329911cbea73c55..c732ce041c5e82ea5e1471ba422f5b056a7cba8f 100644 --- a/fluid/PaddleNLP/machine_reading_comprehension/dataset.py +++ b/fluid/PaddleNLP/machine_reading_comprehension/dataset.py @@ -23,6 +23,7 @@ import json import logging import numpy as np from collections import Counter +import io class BRCDataset(object): @@ -67,7 +68,7 @@ class BRCDataset(object): Args: data_path: the data file to load """ - with open(data_path) as fin: + with io.open(data_path, 'r', encoding='utf-8') as fin: data_set = [] for lidx, line in enumerate(fin): sample = json.loads(line.strip()) diff --git a/fluid/PaddleNLP/machine_reading_comprehension/run.py b/fluid/PaddleNLP/machine_reading_comprehension/run.py index dbe3a4b9a296fdaf089d55be3f0c9845422f0ce5..74561297f003faa4b3d871c0f327b65da63e81e7 100644 --- a/fluid/PaddleNLP/machine_reading_comprehension/run.py +++ b/fluid/PaddleNLP/machine_reading_comprehension/run.py @@ -22,6 +22,7 @@ import os import random import json import six +import multiprocessing import paddle import paddle.fluid as fluid diff --git a/fluid/PaddleNLP/neural_machine_translation/transformer/train.py b/fluid/PaddleNLP/neural_machine_translation/transformer/train.py index 0e9c18416f62c85e76dd060f1fad44073e5841fc..5fc98868aa6e36bc5d1c5c0ad7ab231cda0fd52d 100644 --- a/fluid/PaddleNLP/neural_machine_translation/transformer/train.py +++ b/fluid/PaddleNLP/neural_machine_translation/transformer/train.py @@ -469,7 +469,7 @@ def train_loop(exe, # For faster executor exec_strategy = fluid.ExecutionStrategy() exec_strategy.use_experimental_executor = True - # exec_strategy.num_iteration_per_drop_scope = 5 + exec_strategy.num_iteration_per_drop_scope = int(args.fetch_steps) build_strategy = fluid.BuildStrategy() # Since the token number differs among devices, customize gradient scale to # use token average cost among multi-devices. and the gradient scale is diff --git a/fluid/PaddleNLP/text_classification/train.py b/fluid/PaddleNLP/text_classification/train.py index 159266f3956b950afa200e9f53c9fdc6c36309aa..174636f06ec5fe07180347745f910166140e9eed 100644 --- a/fluid/PaddleNLP/text_classification/train.py +++ b/fluid/PaddleNLP/text_classification/train.py @@ -89,7 +89,7 @@ def train(train_reader, def train_net(): word_dict, train_reader, test_reader = utils.prepare_data( - "imdb", self_dict=False, batch_size=4, buf_size=50000) + "imdb", self_dict=False, batch_size=128, buf_size=50000) if sys.argv[1] == "bow": train( diff --git a/fluid/PaddleNLP/text_matching_on_quora/.run_ce.sh b/fluid/PaddleNLP/text_matching_on_quora/.run_ce.sh old mode 100644 new mode 100755 index eca247a40a3f680a6a59c4a183bfba006ced8d44..f1bb7febd3f2c572544612baf24be14c711108e3 --- a/fluid/PaddleNLP/text_matching_on_quora/.run_ce.sh +++ b/fluid/PaddleNLP/text_matching_on_quora/.run_ce.sh @@ -6,9 +6,9 @@ export OMP_NUM_THREADS=1 cudaid=${text_matching_on_quora:=0} # use 0-th card as default export CUDA_VISIBLE_DEVICES=$cudaid -FLAGS_benchmark=true python train_and_evaluate.py --model_name=cdssmNet --config=cdssm_base --enable_ce | python _ce.py +FLAGS_benchmark=true python train_and_evaluate.py --model_name=cdssmNet --config=cdssm_base --enable_ce --epoch_num=5 | python _ce.py cudaid=${text_matching_on_quora_m:=0,1,2,3} # use 0,1,2,3 card as default export CUDA_VISIBLE_DEVICES=$cudaid -FLAGS_benchmark=true python train_and_evaluate.py --model_name=cdssmNet --config=cdssm_base --enable_ce | python _ce.py +FLAGS_benchmark=true python train_and_evaluate.py --model_name=cdssmNet --config=cdssm_base --enable_ce --epoch_num=5 | python _ce.py diff --git a/fluid/PaddleNLP/text_matching_on_quora/_ce.py b/fluid/PaddleNLP/text_matching_on_quora/_ce.py index b38ad21a1e0eb7407f78d100a3cb3659f6c5d8d3..eadeb821da6f7049d1916a65a1ae4eb995c5cb6d 100644 --- a/fluid/PaddleNLP/text_matching_on_quora/_ce.py +++ b/fluid/PaddleNLP/text_matching_on_quora/_ce.py @@ -7,11 +7,11 @@ from kpi import CostKpi from kpi import DurationKpi -each_pass_duration_card1_kpi = DurationKpi('each_pass_duration_card1', 0.05, 0, actived=True) -train_avg_cost_card1_kpi = CostKpi('train_avg_cost_card1', 0.2, 0) +each_pass_duration_card1_kpi = DurationKpi('each_pass_duration_card1', 0.08, 0, actived=True) +train_avg_cost_card1_kpi = CostKpi('train_avg_cost_card1', 0.08, 0) train_avg_acc_card1_kpi = CostKpi('train_avg_acc_card1', 0.02, 0) -each_pass_duration_card4_kpi = DurationKpi('each_pass_duration_card4', 0.05, 0, actived=True) -train_avg_cost_card4_kpi = CostKpi('train_avg_cost_card4', 0.2, 0) +each_pass_duration_card4_kpi = DurationKpi('each_pass_duration_card4', 0.08, 0, actived=True) +train_avg_cost_card4_kpi = CostKpi('train_avg_cost_card4', 0.08, 0) train_avg_acc_card4_kpi = CostKpi('train_avg_acc_card4', 0.02, 0) tracking_kpis = [ diff --git a/fluid/PaddleNLP/text_matching_on_quora/train_and_evaluate.py b/fluid/PaddleNLP/text_matching_on_quora/train_and_evaluate.py index 714fa6f970d9f213efdc6b6e1799b244696fb20d..0f88c6b6ef13aec25e08527b7efabe8638a3af25 100755 --- a/fluid/PaddleNLP/text_matching_on_quora/train_and_evaluate.py +++ b/fluid/PaddleNLP/text_matching_on_quora/train_and_evaluate.py @@ -34,6 +34,7 @@ parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('--model_name', type=str, default='cdssmNet', help="Which model to train") parser.add_argument('--config', type=str, default='cdssm_base', help="The global config setting") parser.add_argument('--enable_ce', action='store_true', help='If set, run the task with continuous evaluation logs.') +parser.add_argument('--epoch_num', type=int, help='Number of epoch') DATA_DIR = os.path.join(os.path.expanduser('~'), '.cache/paddle/dataset') @@ -241,6 +242,9 @@ def main(): args = parser.parse_args() global_config = configs.__dict__[args.config]() + if args.epoch_num != None: + global_config.epoch_num = args.epoch_num + print("net_name: ", args.model_name) net = models.__dict__[args.model_name](global_config) diff --git a/fluid/PaddleRec/multiview_simnet/nets.py b/fluid/PaddleRec/multiview_simnet/nets.py index 41e366f55c80c5151102ed5e81a2746774fb3b4b..fed177844bdd247d163aee9e8625cd0ec74378b3 100644 --- a/fluid/PaddleRec/multiview_simnet/nets.py +++ b/fluid/PaddleRec/multiview_simnet/nets.py @@ -33,7 +33,7 @@ class CNNEncoder(object): """ cnn-encoder""" def __init__(self, - param_name="cnn.w", + param_name="cnn", win_size=3, ksize=128, act='tanh', @@ -51,13 +51,15 @@ class CNNEncoder(object): filter_size=self.win_size, act=self.act, pool_type=self.pool_type, - param_attr=str(self.param_name)) + param_attr=self.param_name + ".param", + bias_attr=self.param_name + ".bias") + class GrnnEncoder(object): """ grnn-encoder """ - def __init__(self, param_name="grnn.w", hidden_size=128): + def __init__(self, param_name="grnn", hidden_size=128): self.param_name = param_name self.hidden_size = hidden_size @@ -65,13 +67,15 @@ class GrnnEncoder(object): fc0 = nn.fc( input=emb, size=self.hidden_size * 3, - param_attr=str(str(self.param_name) + "_fc") - ) + param_attr=self.param_name + "_fc.w", + bias_attr=False) + gru_h = nn.dynamic_gru( input=fc0, size=self.hidden_size, is_reverse=False, - param_attr=str(self.param_name)) + param_attr=self.param_name + ".param", + bias_attr=self.param_name + ".bias") return nn.sequence_pool(input=gru_h, pool_type='max') @@ -139,17 +143,17 @@ class MultiviewSimnet(object): # lookup embedding for each slot q_embs = [ nn.embedding( - input=query, size=self.emb_shape, param_attr="emb.w") + input=query, size=self.emb_shape, param_attr="emb") for query in q_slots ] pt_embs = [ nn.embedding( - input=title, size=self.emb_shape, param_attr="emb.w") + input=title, size=self.emb_shape, param_attr="emb") for title in pt_slots ] nt_embs = [ nn.embedding( - input=title, size=self.emb_shape, param_attr="emb.w") + input=title, size=self.emb_shape, param_attr="emb") for title in nt_slots ] @@ -170,9 +174,9 @@ class MultiviewSimnet(object): nt_concat = nn.concat(nt_encodes) # projection of hidden layer - q_hid = nn.fc(q_concat, size=self.hidden_size, param_attr='q_fc.w') - pt_hid = nn.fc(pt_concat, size=self.hidden_size, param_attr='t_fc.w') - nt_hid = nn.fc(nt_concat, size=self.hidden_size, param_attr='t_fc.w') + q_hid = nn.fc(q_concat, size=self.hidden_size, param_attr='q_fc.w', bias_attr='q_fc.b') + pt_hid = nn.fc(pt_concat, size=self.hidden_size, param_attr='t_fc.w', bias_attr='t_fc.b') + nt_hid = nn.fc(nt_concat, size=self.hidden_size, param_attr='t_fc.w', bias_attr='t_fc.b') # cosine of hidden layers cos_pos = nn.cos_sim(q_hid, pt_hid) @@ -213,12 +217,12 @@ class MultiviewSimnet(object): # lookup embedding for each slot q_embs = [ nn.embedding( - input=query, size=self.emb_shape, param_attr="emb.w") + input=query, size=self.emb_shape, param_attr="emb") for query in q_slots ] pt_embs = [ nn.embedding( - input=title, size=self.emb_shape, param_attr="emb.w") + input=title, size=self.emb_shape, param_attr="emb") for title in pt_slots ] # encode each embedding field with encoder @@ -232,8 +236,8 @@ class MultiviewSimnet(object): q_concat = nn.concat(q_encodes) pt_concat = nn.concat(pt_encodes) # projection of hidden layer - q_hid = nn.fc(q_concat, size=self.hidden_size, param_attr='q_fc.w') - pt_hid = nn.fc(pt_concat, size=self.hidden_size, param_attr='t_fc.w') + q_hid = nn.fc(q_concat, size=self.hidden_size, param_attr='q_fc.w', bias_attr='q_fc.b') + pt_hid = nn.fc(pt_concat, size=self.hidden_size, param_attr='t_fc.w', bias_attr='t_fc.b') # cosine of hidden layers cos = nn.cos_sim(q_hid, pt_hid) return cos