diff --git a/dygraph/sentiment/.run_ce.sh b/dygraph/sentiment/.run_ce.sh new file mode 100755 index 0000000000000000000000000000000000000000..482c0b27ff9f2936782c105ede0bd12136e13cab --- /dev/null +++ b/dygraph/sentiment/.run_ce.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +# This file is only used for continuous evaluation. +# dygraph single card +export FLAGS_cudnn_deterministic=True +export CUDA_VISIBLE_DEVICES=0 +python main.py --ce --epoch 1 --random_seed 33 --validation_steps 600 | python _ce.py + diff --git a/dygraph/sentiment/_ce.py b/dygraph/sentiment/_ce.py new file mode 100644 index 0000000000000000000000000000000000000000..3dc1499ae66fb8328c51b6a64bbdb23ae29d1b97 --- /dev/null +++ b/dygraph/sentiment/_ce.py @@ -0,0 +1,58 @@ +####this file is only used for continuous evaluation test! +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import os +import sys +from kpi import CostKpi, DurationKpi, AccKpi +sys.path.append(os.environ['ceroot']) + +#### NOTE kpi.py should shared in models in some way!!!! + +train_acc = AccKpi('train_acc', 0.1, 0, actived=True, desc="train acc") +train_loss = CostKpi('train_loss', 0.1, 0, actived=True, desc="train loss") +tracking_kpis = [train_acc, train_loss] + +def parse_log(log): + ''' + This method should be implemented by model developers. + + The suggestion: + + each line in the log should be key, value, for example: + + " + train_cost\t1.0 + test_cost\t1.0 + train_cost\t1.0 + train_cost\t1.0 + train_acc\t1.2 + " + ''' + for line in log.split('\n'): + fs = line.strip().split('\t') + print(fs) + if len(fs) == 3 and fs[0] == 'kpis': + print("-----%s" % fs) + kpi_name = fs[1] + kpi_value = float(fs[2]) + yield kpi_name, kpi_value + + +def log_to_ce(log): + kpi_tracker = {} + for kpi in tracking_kpis: + kpi_tracker[kpi.name] = kpi + + for (kpi_name, kpi_value) in parse_log(log): + print(kpi_name, kpi_value) + kpi_tracker[kpi_name].add_record(kpi_value) + kpi_tracker[kpi_name].persist() + + +if __name__ == '__main__': + log = sys.stdin.read() + print("*****") + print(log) + print("****") + log_to_ce(log) diff --git a/dygraph/sentiment/main.py b/dygraph/sentiment/main.py index 5db20c34f13a6fb9fcf748bf0f72862daa411edf..279b34775648de4f6ae5481ca42313f909a261ed 100644 --- a/dygraph/sentiment/main.py +++ b/dygraph/sentiment/main.py @@ -52,12 +52,13 @@ data_g.add_arg("batch_size", int, 16, data_g.add_arg("random_seed", int, 0, "Random seed.") run_type_g = ArgumentGroup(parser, "run_type", "running type options.") -run_type_g.add_arg("use_cuda", bool, False, "If set, use GPU for training.") +run_type_g.add_arg("use_cuda", bool, True, "If set, use GPU for training.") run_type_g.add_arg("do_train", bool, True, "Whether to perform training.") run_type_g.add_arg("do_val", bool, True, "Whether to perform evaluation.") run_type_g.add_arg("do_infer", bool, False, "Whether to perform inference.") run_type_g.add_arg("profile_steps", int, 15000, "The steps interval to record the performance.") +parser.add_argument("--ce", action="store_true", help="run ce") args = parser.parse_args() @@ -81,8 +82,21 @@ def profile_context(profile=True): yield +if args.ce: + print("ce mode") + seed = 90 + np.random.seed(seed) + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + def train(): with fluid.dygraph.guard(place): + if args.ce: + print("ce mode") + seed = 90 + np.random.seed(seed) + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed processor = reader.SentaProcessor( data_dir=args.data_dir, vocab_path=args.vocab_path, @@ -92,19 +106,31 @@ def train(): num_train_examples = processor.get_num_examples(phase="train") max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count - - train_data_generator = processor.data_generator( - batch_size=args.batch_size, - phase='train', - epoch=args.epoch, - shuffle=True) - - eval_data_generator = processor.data_generator( - batch_size=args.batch_size, - phase='dev', - epoch=args.epoch, - shuffle=False) - + + if not args.ce: + train_data_generator = processor.data_generator( + batch_size=args.batch_size, + phase='train', + epoch=args.epoch, + shuffle=True) + + eval_data_generator = processor.data_generator( + batch_size=args.batch_size, + phase='dev', + epoch=args.epoch, + shuffle=False) + else: + train_data_generator = processor.data_generator( + batch_size=args.batch_size, + phase='train', + epoch=args.epoch, + shuffle=False) + + eval_data_generator = processor.data_generator( + batch_size=args.batch_size, + phase='dev', + epoch=args.epoch, + shuffle=False) cnn_net = nets.CNN("cnn_net", args.vocab_size, args.batch_size, args.padding_size) @@ -137,7 +163,6 @@ def train(): cnn_net.train() avg_cost, prediction, acc = cnn_net(doc, label) avg_cost.backward() - np_mask = (doc.numpy() != args.vocab_size).astype('int32') word_num = np.sum(np_mask) sgd_optimizer.minimize(avg_cost) @@ -200,14 +225,16 @@ def train(): / np.sum(total_eval_num_seqs), eval_steps / used_time)) time_begin = time.time() + if args.ce: + print("kpis\ttrain_loss\t%0.3f" % (np.sum(total_eval_cost) / np.sum(total_eval_num_seqs))) + print("kpis\ttrain_acc\t%0.3f" % (np.sum(total_eval_acc) / np.sum(total_eval_num_seqs))) if steps % args.save_steps == 0: save_path = "save_dir_" + str(steps) print('save model to: ' + save_path) fluid.dygraph.save_persistables(cnn_net.state_dict(), save_path) - - if enable_profile: + if enable_profile: print('save profile result into /tmp/profile_file') return diff --git a/dygraph/sentiment/nets.py b/dygraph/sentiment/nets.py index e887bf27c7b9a04cfa84a48c88489461fd5cfc46..4c64e3545ea6906d256560bd214dc70dbf5b7dbb 100644 --- a/dygraph/sentiment/nets.py +++ b/dygraph/sentiment/nets.py @@ -76,7 +76,6 @@ class CNN(fluid.dygraph.Layer): emb = fluid.layers.reshape( emb, shape=[-1, 1, self.seq_len, self.hid_dim]) conv_3 = self._simple_conv_pool_1(emb) - fc_1 = self._fc1(conv_3) prediction = self._fc_prediction(fc_1) diff --git a/dygraph/sentiment/reader.py b/dygraph/sentiment/reader.py index db1f27183e00d972d5d0cebbb5e333d069b3cb79..4093b3cfb88446d9b45858e76400d82ca14122cd 100644 --- a/dygraph/sentiment/reader.py +++ b/dygraph/sentiment/reader.py @@ -25,13 +25,13 @@ class SentaProcessor(object): self.num_examples = {"train": -1, "dev": -1, "infer": -1} np.random.seed(random_seed) - def get_train_examples(self, data_dir, epoch): + def get_train_examples(self, data_dir, epoch, shuffle): return data_reader((self.data_dir + "/train.tsv"), self.vocab, - self.num_examples, "train", epoch) + self.num_examples, "train", epoch, shuffle) - def get_dev_examples(self, data_dir, epoch): + def get_dev_examples(self, data_dir, epoch, shuffle): return data_reader((self.data_dir + "/dev.tsv"), self.vocab, - self.num_examples, "dev", epoch) + self.num_examples, "dev", epoch, shuffle) def get_test_examples(self, data_dir, epoch): return data_reader((self.data_dir + "/test.tsv"), self.vocab, @@ -52,12 +52,12 @@ class SentaProcessor(object): def data_generator(self, batch_size, phase='train', epoch=1, shuffle=True): if phase == "train": return paddle.batch( - self.get_train_examples(self.data_dir, epoch), + self.get_train_examples(self.data_dir, epoch, shuffle), batch_size, drop_last=True) elif phase == "dev": return paddle.batch( - self.get_dev_examples(self.data_dir, epoch), + self.get_dev_examples(self.data_dir, epoch, shuffle), batch_size, drop_last=True) elif phase == "infer": diff --git a/dygraph/sentiment/utils.py b/dygraph/sentiment/utils.py index 20d80dc505205abe7d641d29c74a45ff326ffcad..555ee8a3a96aee9dec15c41d8ae08274e9bce7aa 100644 --- a/dygraph/sentiment/utils.py +++ b/dygraph/sentiment/utils.py @@ -38,7 +38,7 @@ class ArgumentGroup(object): **kwargs) -def data_reader(file_path, word_dict, num_examples, phrase, epoch): +def data_reader(file_path, word_dict, num_examples, phrase, epoch, shuffle=False): unk_id = len(word_dict) all_data = [] with io.open(file_path, "r", encoding='utf8') as fin: @@ -56,8 +56,9 @@ def data_reader(file_path, word_dict, num_examples, phrase, epoch): ] all_data.append((wids, label)) - if phrase == "train": - random.shuffle(all_data) + if shuffle: + if phrase == "train": + random.shuffle(all_data) num_examples[phrase] = len(all_data)