From 7d7ab3afc01a85ce09078175582df301d004854d Mon Sep 17 00:00:00 2001 From: FrostML <380185688@qq.com> Date: Tue, 26 May 2020 02:13:11 +0000 Subject: [PATCH] align the behavior of simnet with static graph, test=develop --- dygraph/similarity_net/nets/gru.py | 32 ++--- dygraph/similarity_net/nets/lstm.py | 33 +++-- dygraph/similarity_net/nets/paddle_layers.py | 30 +++++ dygraph/similarity_net/reader.py | 48 ++++---- dygraph/similarity_net/run.sh | 4 +- dygraph/similarity_net/run_classifier.py | 25 ---- dygraph/similarity_net/utils.py | 120 ++++++++++++++----- 7 files changed, 179 insertions(+), 113 deletions(-) diff --git a/dygraph/similarity_net/nets/gru.py b/dygraph/similarity_net/nets/gru.py index b505a3a0..7b6e70c7 100644 --- a/dygraph/similarity_net/nets/gru.py +++ b/dygraph/similarity_net/nets/gru.py @@ -21,6 +21,8 @@ from paddle.fluid.dygraph.nn import Linear from paddle.fluid.dygraph import Layer from paddle import fluid import numpy as np +from utils import seq_length + class GRU(Layer): """ @@ -37,13 +39,16 @@ class GRU(Layer): self.emb_dim = conf_dict["net"]["emb_dim"] self.gru_dim = conf_dict["net"]["gru_dim"] self.hidden_dim = conf_dict["net"]["hidden_dim"] - self.emb_layer = layers.EmbeddingLayer(self.dict_size, self.emb_dim, "emb").ops() - + self.emb_layer = layers.EmbeddingLayer(self.dict_size, self.emb_dim, + "emb").ops() + self.gru_layer = layers.DynamicGRULayer(self.gru_dim, "gru").ops() self.fc_layer = layers.FCLayer(self.hidden_dim, None, "fc").ops() - self.proj_layer = Linear(input_dim = self.hidden_dim, output_dim=self.gru_dim*3) + self.proj_layer = Linear( + input_dim=self.hidden_dim, output_dim=self.gru_dim * 3) self.softmax_layer = layers.FCLayer(2, "softmax", "cos_sim").ops() - self.seq_len=conf_dict["seq_len"] + self.last_layer = layers.ExtractLastLayer() + self.seq_len = conf_dict["seq_len"] def forward(self, left, right): """ @@ -60,18 +65,15 @@ class GRU(Layer): h_0 = to_variable(h_0) left_gru = self.gru_layer(left_emb, h_0=h_0) right_gru = self.gru_layer(right_emb, h_0=h_0) - left_emb = fluid.layers.reduce_max(left_gru, dim=1) - right_emb = fluid.layers.reduce_max(right_gru, dim=1) - left_emb = fluid.layers.reshape( - left_emb, shape=[-1, self.seq_len, self.hidden_dim]) - right_emb = fluid.layers.reshape( - right_emb, shape=[-1, self.seq_len, self.hidden_dim]) - left_emb = fluid.layers.reduce_sum(left_emb, dim=1) - right_emb = fluid.layers.reduce_sum(right_emb, dim=1) + # Get sequence length before padding + left_len = seq_length(left) + left_len.stop_gradient = True + right_len = seq_length(right) + right_len.stop_gradient = True + # Extract last step + left_last = self.last_layer.ops(left_gru, left_len) + right_last = self.last_layer.ops(right_gru, right_len) - left_last = fluid.layers.tanh(left_emb) - right_last = fluid.layers.tanh(right_emb) - if self.task_mode == "pairwise": left_fc = self.fc_layer(left_last) right_fc = self.fc_layer(right_last) diff --git a/dygraph/similarity_net/nets/lstm.py b/dygraph/similarity_net/nets/lstm.py index 9f53928a..a77f2d8a 100644 --- a/dygraph/similarity_net/nets/lstm.py +++ b/dygraph/similarity_net/nets/lstm.py @@ -17,6 +17,8 @@ lstm class import paddle_layers as layers from paddle.fluid.dygraph import Layer, Linear from paddle import fluid +from utils import seq_length + class LSTM(Layer): """ @@ -27,20 +29,22 @@ class LSTM(Layer): """ initialize """ - super(LSTM,self).__init__() + super(LSTM, self).__init__() self.dict_size = conf_dict["dict_size"] self.task_mode = conf_dict["task_mode"] self.emb_dim = conf_dict["net"]["emb_dim"] self.lstm_dim = conf_dict["net"]["lstm_dim"] self.hidden_dim = conf_dict["net"]["hidden_dim"] - self.emb_layer = layers.EmbeddingLayer(self.dict_size, self.emb_dim, "emb").ops() + self.emb_layer = layers.EmbeddingLayer(self.dict_size, self.emb_dim, + "emb").ops() self.lstm_layer = layers.DynamicLSTMLayer(self.lstm_dim, "lstm").ops() self.fc_layer = layers.FCLayer(self.hidden_dim, None, "fc").ops() self.softmax_layer = layers.FCLayer(2, "softmax", "cos_sim").ops() - self.proj_layer = Linear(input_dim = self.hidden_dim, output_dim=self.lstm_dim*4) + self.proj_layer = Linear( + input_dim=self.hidden_dim, output_dim=self.lstm_dim * 4) + self.last_layer = layers.ExtractLastLayer() self.seq_len = conf_dict["seq_len"] - def forward(self, left, right): """ Forward network @@ -53,20 +57,15 @@ class LSTM(Layer): right_proj = self.proj_layer(right_emb) left_lstm, _ = self.lstm_layer(left_proj) right_lstm, _ = self.lstm_layer(right_proj) - - left_emb = fluid.layers.reduce_max(left_lstm, dim=1) - right_emb = fluid.layers.reduce_max(right_lstm, dim=1) - left_emb = fluid.layers.reshape( - left_emb, shape=[-1, self.seq_len, self.hidden_dim]) - right_emb = fluid.layers.reshape( - right_emb, shape=[-1, self.seq_len, self.hidden_dim]) - left_emb = fluid.layers.reduce_sum(left_emb, dim=1) - right_emb = fluid.layers.reduce_sum(right_emb, dim=1) - - left_last = fluid.layers.tanh(left_emb) - right_last = fluid.layers.tanh(right_emb) + # Get sequence length before padding + left_len = seq_length(left) + left_len.stop_gradient = True + right_len = seq_length(right) + right_len.stop_gradient = True + # Extract last step + left_last = self.last_layer.ops(left_lstm, left_len) + right_last = self.last_layer.ops(right_lstm, right_len) - # matching layer if self.task_mode == "pairwise": left_fc = self.fc_layer(left_last) diff --git a/dygraph/similarity_net/nets/paddle_layers.py b/dygraph/similarity_net/nets/paddle_layers.py index d0f5c0bf..ffbee94d 100644 --- a/dygraph/similarity_net/nets/paddle_layers.py +++ b/dygraph/similarity_net/nets/paddle_layers.py @@ -1051,3 +1051,33 @@ class BasicGRUUnit(Layer): new_hidden = u * pre_hidden + (1 - u) * c return new_hidden + + +class ExtractLastLayer(object): + """ + a layer class: get the last step layer + """ + + def __init__(self): + """ + init function + """ + pass + + def ops(self, input_hidden, seq_length=None): + """ + operation + """ + if seq_length is not None: + output = input_hidden + output_shape = output.shape + batch_size = output_shape[0] + max_length = output_shape[1] + emb_size = output_shape[2] + index = fluid.layers.range(0, batch_size, 1, + 'int32') * max_length + (seq_length - 1) + flat = fluid.layers.reshape(output, [-1, emb_size]) + return fluid.layers.gather(flat, index) + else: + output = fluid.layers.transpose(input_hidden, [1, 0, 2]) + return fluid.layers.gather(output, output.shape[0] - 1) diff --git a/dygraph/similarity_net/reader.py b/dygraph/similarity_net/reader.py index cda39b8e..cac48085 100644 --- a/dygraph/similarity_net/reader.py +++ b/dygraph/similarity_net/reader.py @@ -29,15 +29,14 @@ class SimNetProcessor(object): self.test_label = np.array([]) self.seq_len = args.seq_len - + def padding_text(self, x): if len(x) < self.seq_len: - x += [0]*(self.seq_len-len(x)) + x += [0] * (self.seq_len - len(x)) if len(x) > self.seq_len: - x = x[0:self.seq_len] + x = x[0:self.seq_len] return x - def get_reader(self, mode, epoch=0): """ Get Reader @@ -48,8 +47,8 @@ class SimNetProcessor(object): Reader with Pairwise """ if mode == "valid": - with io.open(self.args.valid_data_dir, "r", - encoding="utf8") as file: + with io.open( + self.args.valid_data_dir, "r", encoding="utf8") as file: for line in file: query, title, label = line.strip().split("\t") if len(query) == 0 or len(title) == 0 or len( @@ -76,7 +75,8 @@ class SimNetProcessor(object): yield [query, title] elif mode == "test": - with io.open(self.args.test_data_dir, "r", encoding="utf8") as file: + with io.open( + self.args.test_data_dir, "r", encoding="utf8") as file: for line in file: query, title, label = line.strip().split("\t") if len(query) == 0 or len(title) == 0 or len( @@ -104,34 +104,38 @@ class SimNetProcessor(object): yield [query, title] else: for idx in range(epoch): - with io.open(self.args.train_data_dir, "r", - encoding="utf8") as file: + with io.open( + self.args.train_data_dir, "r", + encoding="utf8") as file: for line in file: - query, pos_title, neg_title = line.strip().split("\t") + query, pos_title, neg_title = line.strip().split( + "\t") if len(query) == 0 or len(pos_title) == 0 or len( neg_title) == 0: logging.warning( - "line not match format in test file") + "line not match format in train file") continue query = [ self.vocab[word] for word in query.split(" ") if word in self.vocab ] pos_title = [ - self.vocab[word] for word in pos_title.split(" ") + self.vocab[word] + for word in pos_title.split(" ") if word in self.vocab ] neg_title = [ - self.vocab[word] for word in neg_title.split(" ") + self.vocab[word] + for word in neg_title.split(" ") if word in self.vocab ] if len(query) == 0: query = [0] - if len(pos_title) == 0: + if len(pos_title) == 0: pos_title = [0] if len(neg_title) == 0: neg_title = [0] - + query = self.padding_text(query) pos_title = self.padding_text(pos_title) neg_title = self.padding_text(neg_title) @@ -143,8 +147,8 @@ class SimNetProcessor(object): Reader with Pointwise """ if mode == "valid": - with io.open(self.args.valid_data_dir, "r", - encoding="utf8") as file: + with io.open( + self.args.valid_data_dir, "r", encoding="utf8") as file: for line in file: query, title, label = line.strip().split("\t") if len(query) == 0 or len(title) == 0 or len( @@ -165,13 +169,14 @@ class SimNetProcessor(object): query = [0] if len(title) == 0: title = [0] - + query = self.padding_text(query) title = self.padding_text(title) yield [query, title] elif mode == "test": - with io.open(self.args.test_data_dir, "r", encoding="utf8") as file: + with io.open( + self.args.test_data_dir, "r", encoding="utf8") as file: for line in file: query, title, label = line.strip().split("\t") if len(query) == 0 or len(title) == 0 or len( @@ -199,8 +204,9 @@ class SimNetProcessor(object): yield [query, title] else: for idx in range(epoch): - with io.open(self.args.train_data_dir, "r", - encoding="utf8") as file: + with io.open( + self.args.train_data_dir, "r", + encoding="utf8") as file: for line in file: query, title, label = line.strip().split("\t") if len(query) == 0 or len(title) == 0 or len( diff --git a/dygraph/similarity_net/run.sh b/dygraph/similarity_net/run.sh index 657505a5..ab162955 100644 --- a/dygraph/similarity_net/run.sh +++ b/dygraph/similarity_net/run.sh @@ -48,7 +48,7 @@ train() { evaluate() { python run_classifier.py \ --task_name ${TASK_NAME} \ - --use_cuda false \ + --use_cuda False \ --do_test True \ --verbose_result True \ --batch_size 128 \ @@ -65,7 +65,7 @@ evaluate() { infer() { python run_classifier.py \ --task_name ${TASK_NAME} \ - --use_cuda false \ + --use_cuda False \ --do_infer True \ --batch_size 128 \ --infer_data_dir ${INFER_DATA_PATH} \ diff --git a/dygraph/similarity_net/run_classifier.py b/dygraph/similarity_net/run_classifier.py index ff464e43..fa1aa803 100644 --- a/dygraph/similarity_net/run_classifier.py +++ b/dygraph/similarity_net/run_classifier.py @@ -161,10 +161,6 @@ def train(conf_dict, args): if args.task_mode == "pairwise": for left, pos_right, neg_right in train_loader(): - - left = fluid.layers.reshape(left, shape=[-1, 1]) - pos_right = fluid.layers.reshape(pos_right, shape=[-1, 1]) - neg_right = fluid.layers.reshape(neg_right, shape=[-1, 1]) net.train() global_step += 1 left_feat, pos_score = net(left, pos_right) @@ -178,9 +174,6 @@ def train(conf_dict, args): if args.do_valid and global_step % args.validation_steps == 0: for left, pos_right in valid_loader(): - left = fluid.layers.reshape(left, shape=[-1, 1]) - pos_right = fluid.layers.reshape( - pos_right, shape=[-1, 1]) net.eval() left_feat, pos_score = net(left, pos_right) pred = pos_score @@ -212,9 +205,6 @@ def train(conf_dict, args): logging.info("saving infer model in %s" % model_path) else: for left, right, label in train_loader(): - left = fluid.layers.reshape(left, shape=[-1, 1]) - right = fluid.layers.reshape(right, shape=[-1, 1]) - label = fluid.layers.reshape(label, shape=[-1, 1]) net.train() global_step += 1 left_feat, pred = net(left, right) @@ -226,8 +216,6 @@ def train(conf_dict, args): if args.do_valid and global_step % args.validation_steps == 0: for left, right in valid_loader(): - left = fluid.layers.reshape(left, shape=[-1, 1]) - right = fluid.layers.reshape(right, shape=[-1, 1]) net.eval() left_feat, pred = net(left, right) pred_list += list(pred.numpy()) @@ -296,11 +284,7 @@ def train(conf_dict, args): place) pred_list = [] for left, pos_right in test_loader(): - left = fluid.layers.reshape(left, shape=[-1, 1]) - pos_right = fluid.layers.reshape(pos_right, shape=[-1, 1]) net.eval() - left = fluid.layers.reshape(left, shape=[-1, 1]) - pos_right = fluid.layers.reshape(pos_right, shape=[-1, 1]) left_feat, pos_score = net(left, pos_right) pred = pos_score pred_list += list(pred.numpy()) @@ -351,9 +335,6 @@ def test(conf_dict, args): "predictions.txt", "w", encoding="utf8") as predictions_file: if args.task_mode == "pairwise": for left, pos_right in test_loader(): - left = fluid.layers.reshape(left, shape=[-1, 1]) - pos_right = fluid.layers.reshape(pos_right, shape=[-1, 1]) - left_feat, pos_score = net(left, pos_right) pred = pos_score @@ -365,8 +346,6 @@ def test(conf_dict, args): else: for left, right in test_loader(): - left = fluid.layers.reshape(left, shape=[-1, 1]) - right = fluid.layers.reshape(right, shape=[-1, 1]) left_feat, pred = net(left, right) pred_list += list( @@ -433,8 +412,6 @@ def infer(conf_dict, args): pred_list = [] if args.task_mode == "pairwise": for left, pos_right in infer_loader(): - left = fluid.layers.reshape(left, shape=[-1, 1]) - pos_right = fluid.layers.reshape(pos_right, shape=[-1, 1]) left_feat, pos_score = net(left, pos_right) pred = pos_score @@ -443,8 +420,6 @@ def infer(conf_dict, args): else: for left, right in infer_loader(): - left = fluid.layers.reshape(left, shape=[-1, 1]) - pos_right = fluid.layers.reshape(right, shape=[-1, 1]) left_feat, pred = net(left, right) pred_list += map(lambda item: str(np.argmax(item)), pred.numpy()) diff --git a/dygraph/similarity_net/utils.py b/dygraph/similarity_net/utils.py index 77b122f7..5fc6edee 100644 --- a/dygraph/similarity_net/utils.py +++ b/dygraph/similarity_net/utils.py @@ -33,6 +33,7 @@ from functools import partial ******functions for file processing****** """ + def load_vocab(file_path): """ load the given vocabulary @@ -59,8 +60,11 @@ def get_result_file(args): """ with io.open(args.test_data_dir, "r", encoding="utf8") as test_file: - with io.open("predictions.txt", "r", encoding="utf8") as predictions_file: - with io.open(args.test_result_path, "w", encoding="utf8") as test_result_file: + with io.open( + "predictions.txt", "r", encoding="utf8") as predictions_file: + with io.open( + args.test_result_path, "w", + encoding="utf8") as test_result_file: test_datas = [line.strip("\n") for line in test_file] predictions = [line.strip("\n") for line in predictions_file] for test_data, prediction in zip(test_datas, predictions): @@ -168,52 +172,82 @@ class ArgumentGroup(object): help=help + ' Default: %(default)s.', **kwargs) + class ArgConfig(object): def __init__(self): parser = argparse.ArgumentParser() - model_g = ArgumentGroup(parser, "model", "model configuration and paths.") - model_g.add_arg("config_path", str, None, "Path to the json file for EmoTect model config.") - model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") - model_g.add_arg("output_dir", str, None, "Directory path to save checkpoints") - model_g.add_arg("task_mode", str, None, "task mode: pairwise or pointwise") - + model_g = ArgumentGroup(parser, "model", + "model configuration and paths.") + model_g.add_arg("config_path", str, None, + "Path to the json file for EmoTect model config.") + model_g.add_arg("init_checkpoint", str, None, + "Init checkpoint to resume training from.") + model_g.add_arg("output_dir", str, None, + "Directory path to save checkpoints") + model_g.add_arg("task_mode", str, None, + "task mode: pairwise or pointwise") train_g = ArgumentGroup(parser, "training", "training options.") train_g.add_arg("epoch", int, 10, "Number of epoches for training.") - train_g.add_arg("save_steps", int, 200, "The steps interval to save checkpoints.") - train_g.add_arg("validation_steps", int, 100, "The steps interval to evaluate model performance.") + train_g.add_arg("save_steps", int, 200, + "The steps interval to save checkpoints.") + train_g.add_arg("validation_steps", int, 100, + "The steps interval to evaluate model performance.") log_g = ArgumentGroup(parser, "logging", "logging related") - log_g.add_arg("skip_steps", int, 10, "The steps interval to print loss.") - log_g.add_arg("verbose_result", bool, True, "Whether to output verbose result.") - log_g.add_arg("test_result_path", str, "test_result", "Directory path to test result.") - log_g.add_arg("infer_result_path", str, "infer_result", "Directory path to infer result.") - - data_g = ArgumentGroup(parser, "data", "Data paths, vocab paths and data processing options") - data_g.add_arg("train_data_dir", str, None, "Directory path to training data.") - data_g.add_arg("valid_data_dir", str, None, "Directory path to valid data.") - data_g.add_arg("test_data_dir", str, None, "Directory path to testing data.") - data_g.add_arg("infer_data_dir", str, None, "Directory path to infer data.") + log_g.add_arg("skip_steps", int, 10, + "The steps interval to print loss.") + log_g.add_arg("verbose_result", bool, True, + "Whether to output verbose result.") + log_g.add_arg("test_result_path", str, "test_result", + "Directory path to test result.") + log_g.add_arg("infer_result_path", str, "infer_result", + "Directory path to infer result.") + + data_g = ArgumentGroup( + parser, "data", + "Data paths, vocab paths and data processing options") + data_g.add_arg("train_data_dir", str, None, + "Directory path to training data.") + data_g.add_arg("valid_data_dir", str, None, + "Directory path to valid data.") + data_g.add_arg("test_data_dir", str, None, + "Directory path to testing data.") + data_g.add_arg("infer_data_dir", str, None, + "Directory path to infer data.") data_g.add_arg("vocab_path", str, None, "Vocabulary path.") - data_g.add_arg("batch_size", int, 32, "Total examples' number in batch for training.") + data_g.add_arg("batch_size", int, 32, + "Total examples' number in batch for training.") data_g.add_arg("seq_len", int, 32, "The length of each sentence.") - run_type_g = ArgumentGroup(parser, "run_type", "running type options.") - run_type_g.add_arg("use_cuda", bool, False, "If set, use GPU for training.") - run_type_g.add_arg("task_name", str, None, "The name of task to perform sentiment classification.") - run_type_g.add_arg("do_train", bool, False, "Whether to perform training.") + run_type_g.add_arg("use_cuda", bool, False, + "If set, use GPU for training.") + run_type_g.add_arg( + "task_name", str, None, + "The name of task to perform sentiment classification.") + run_type_g.add_arg("do_train", bool, False, + "Whether to perform training.") run_type_g.add_arg("do_valid", bool, False, "Whether to perform dev.") - run_type_g.add_arg("do_test", bool, False, "Whether to perform testing.") - run_type_g.add_arg("do_infer", bool, False, "Whether to perform inference.") - run_type_g.add_arg("compute_accuracy", bool, False, "Whether to compute accuracy.") - run_type_g.add_arg("lamda", float, 0.91, "When task_mode is pairwise, lamda is the threshold for calculating the accuracy.") + run_type_g.add_arg("do_test", bool, False, + "Whether to perform testing.") + run_type_g.add_arg("do_infer", bool, False, + "Whether to perform inference.") + run_type_g.add_arg("compute_accuracy", bool, False, + "Whether to compute accuracy.") + run_type_g.add_arg( + "lamda", float, 0.91, + "When task_mode is pairwise, lamda is the threshold for calculating the accuracy." + ) custom_g = ArgumentGroup(parser, "customize", "customized options.") self.custom_g = custom_g - parser.add_argument('--enable_ce',action='store_true',help='If set, run the task with continuous evaluation logs.') + parser.add_argument( + '--enable_ce', + action='store_true', + help='If set, run the task with continuous evaluation logs.') self.parser = parser @@ -355,7 +389,7 @@ def init_checkpoint(exe, init_checkpoint_path, main_program): """ assert os.path.exists( init_checkpoint_path), "[%s] cann't be found." % init_checkpoint_path - + def existed_persitables(var): if not fluid.io.is_persistable(var): return False @@ -384,6 +418,26 @@ def load_dygraph(model_path, keep_name_table=False): if six.PY3: load_bak = pickle.load pickle.load = partial(load_bak, encoding="latin1") - para_dict, opti_dict = fluid.load_dygraph(model_path, keep_name_table) + para_dict, opti_dict = fluid.load_dygraph(model_path, + keep_name_table) pickle.load = load_bak - return para_dict, opti_dict \ No newline at end of file + return para_dict, opti_dict + + +def seq_length(sequence): + """ + get sequence length + for id-sequence, (N, S) + or vector-sequence (N, S, D) + """ + if len(sequence.shape) == 2: + used = fluid.layers.sign( + fluid.layers.cast(fluid.layers.abs(sequence), np.float32)) + else: + used = fluid.layers.sign( + fluid.layers.cast( + fluid.layers.reduce_max(fluid.layers.abs(sequence), 2), + np.float32)) + length = fluid.layers.reduce_sum(used, 1) + length = fluid.layers.cast(length, np.int32) + return length -- GitLab