From bb9803a052212e489a167a7a02ab6116777f2ebc Mon Sep 17 00:00:00 2001 From: wangxiao1021 Date: Thu, 23 Apr 2020 17:21:06 +0800 Subject: [PATCH] remove dropout in predict, fix #77, update postprocess --- paddlepalm/backbone/bert.py | 4 ++-- paddlepalm/backbone/ernie.py | 4 ++-- paddlepalm/head/cls.py | 21 ++++++++++++--------- paddlepalm/head/match.py | 27 +++++++++++++++------------ paddlepalm/head/mlm.py | 16 +++++++++------- paddlepalm/head/mrc.py | 30 +++++++++++++++--------------- paddlepalm/head/ner.py | 12 ++++++------ 7 files changed, 61 insertions(+), 53 deletions(-) diff --git a/paddlepalm/backbone/bert.py b/paddlepalm/backbone/bert.py index 2872db7..46924fc 100644 --- a/paddlepalm/backbone/bert.py +++ b/paddlepalm/backbone/bert.py @@ -42,8 +42,8 @@ class BERT(Backbone): self._hidden_act = hidden_act - self._prepostprocess_dropout = hidden_dropout_prob - self._attention_dropout = attention_probs_dropout_prob + self._prepostprocess_dropout = 0. if phase == 'predict' else hidden_dropout_prob + self._attention_dropout = 0. if phase == 'predict' else attention_probs_dropout_prob self._word_emb_name = "word_embedding" self._pos_emb_name = "pos_embedding" diff --git a/paddlepalm/backbone/ernie.py b/paddlepalm/backbone/ernie.py index bbfbde6..5619377 100644 --- a/paddlepalm/backbone/ernie.py +++ b/paddlepalm/backbone/ernie.py @@ -45,8 +45,8 @@ class ERNIE(Backbone): self._task_types = task_type_vocab_size self._hidden_act = hidden_act - self._prepostprocess_dropout = hidden_dropout_prob - self._attention_dropout = attention_probs_dropout_prob + self._prepostprocess_dropout = 0. if phase == 'predict' else hidden_dropout_prob + self._attention_dropout = 0. if phase == 'predict' else attention_probs_dropout_prob self._word_emb_name = "word_embedding" self._pos_emb_name = "pos_embedding" diff --git a/paddlepalm/head/cls.py b/paddlepalm/head/cls.py index 4da3580..e0022ea 100644 --- a/paddlepalm/head/cls.py +++ b/paddlepalm/head/cls.py @@ -94,14 +94,17 @@ class Classify(Head): def epoch_postprocess(self, post_inputs, output_dir=None): # there is no post_inputs needed and not declared in epoch_inputs_attrs, hence no elements exist in post_inputs if not self._is_training: - if output_dir is None: - raise ValueError('argument output_dir not found in config. Please add it into config dict/file.') - with open(os.path.join(output_dir, 'predictions.json'), 'w') as writer: - for i in range(len(self._preds)): - label = int(np.argmax(np.array(self._preds[i]))) - result = {'index': i, 'label': label, 'logits': self._preds[i], 'probs': self._probs[i]} - result = json.dumps(result) - writer.write(result+'\n') - print('Predictions saved at '+os.path.join(output_dir, 'predictions.json')) + results = [] + for i in range(len(self._preds)): + label = int(np.argmax(np.array(self._preds[i]))) + result = {'index': i, 'label': label, 'logits': self._preds[i], 'probs': self._probs[i]} + results.append(result) + if output_dir is not None: + with open(os.path.join(output_dir, 'predictions.json'), 'w') as writer: + for result in results: + result = json.dumps(result) + writer.write(result+'\n') + print('Predictions saved at '+os.path.join(output_dir, 'predictions.json')) + return results diff --git a/paddlepalm/head/match.py b/paddlepalm/head/match.py index 38cf1b2..4921f6c 100644 --- a/paddlepalm/head/match.py +++ b/paddlepalm/head/match.py @@ -174,15 +174,18 @@ class Match(Head): def epoch_postprocess(self, post_inputs, output_dir=None): # there is no post_inputs needed and not declared in epoch_inputs_attrs, hence no elements exist in post_inputs if not self._is_training: - if output_dir is None: - raise ValueError('argument output_dir not found in config. Please add it into config dict/file.') - with open(os.path.join(output_dir, 'predictions.json'), 'w') as writer: - for i in range(len(self._preds)): - if self._learning_strategy == 'pointwise': - label = int(np.argmax(np.array(self._preds[i]))) - result = {'index': i, 'label': label, 'logits': self._preds_logits[i], 'probs': self._preds[i]} - elif self._learning_strategy == 'pairwise': - result = {'index': i, 'probs': self._preds[i][0]} - result = json.dumps(result, ensure_ascii=False) - writer.write(result+'\n') - print('Predictions saved at '+os.path.join(output_dir, 'predictions.json')) + results = [] + for i in range(len(self._preds)): + if self._learning_strategy == 'pointwise': + label = int(np.argmax(np.array(self._preds[i]))) + result = {'index': i, 'label': label, 'logits': self._preds_logits[i], 'probs': self._preds[i]} + elif self._learning_strategy == 'pairwise': + result = {'index': i, 'probs': self._preds[i][0]} + results.append(result) + if output_dir is not None: + with open(os.path.join(output_dir, 'predictions.json'), 'w') as writer: + for result in results: + result = json.dumps(result, ensure_ascii=False) + writer.write(result+'\n') + print('Predictions saved at '+os.path.join(output_dir, 'predictions.json')) + return results diff --git a/paddlepalm/head/mlm.py b/paddlepalm/head/mlm.py index f725c1d..0bd6986 100644 --- a/paddlepalm/head/mlm.py +++ b/paddlepalm/head/mlm.py @@ -128,13 +128,15 @@ class MaskLM(Head): def epoch_postprocess(self, post_inputs, output_dir=None): # there is no post_inputs needed and not declared in epoch_inputs_attrs, hence no elements exist in post_inputs if not self._is_training: - if output_dir is None: - for p in self._preds: - print(p) - else: + results = [] + for i in range(len(self._preds)): + result = {'index': i, 'word_id': self._preds[i]} + results.append(result) + if output_dir is not None: with open(os.path.join(output_dir, 'predictions.json'), 'w') as writer: - for p in self._preds: - writer.write(str(p)+'\n') + for result in results: + result = json.dumps(result) + writer.write(result+'\n') print('Predictions saved at '+os.path.join(output_dir, 'predictions.json')) - + return results diff --git a/paddlepalm/head/mrc.py b/paddlepalm/head/mrc.py index dba4fe9..67211c7 100644 --- a/paddlepalm/head/mrc.py +++ b/paddlepalm/head/mrc.py @@ -154,21 +154,21 @@ class MRC(Head): """(optional interface) this func will be called after evaluation/predicting process and each epoch during training process.""" if not self._is_training: - if output_dir is None: - raise ValueError('argument output_dir not found in config. Please add it into config dict/file.') - examples = post_inputs['reader']['examples'] - features = post_inputs['reader']['features'] - if not os.path.exists(output_dir): - os.makedirs(output_dir) - output_prediction_file = os.path.join(output_dir, "predictions.json") - output_nbest_file = os.path.join(output_dir, "nbest_predictions.json") - output_null_log_odds_file = os.path.join(output_dir, "null_odds.json") - _write_predictions(examples, features, self._pred_results, - self._n_best_size, self._max_answer_length, - self._do_lower_case, output_prediction_file, - output_nbest_file, output_null_log_odds_file, - self._with_negative, - self._null_score_diff_threshold, self._verbose) + if output_dir is not None: + examples = post_inputs['reader']['examples'] + features = post_inputs['reader']['features'] + if not os.path.exists(output_dir): + os.makedirs(output_dir) + output_prediction_file = os.path.join(output_dir, "predictions.json") + output_nbest_file = os.path.join(output_dir, "nbest_predictions.json") + output_null_log_odds_file = os.path.join(output_dir, "null_odds.json") + _write_predictions(examples, features, self._pred_results, + self._n_best_size, self._max_answer_length, + self._do_lower_case, output_prediction_file, + output_nbest_file, output_null_log_odds_file, + self._with_negative, + self._null_score_diff_threshold, self._verbose) + return self._pred_results def _write_predictions(all_examples, all_features, all_results, n_best_size, diff --git a/paddlepalm/head/ner.py b/paddlepalm/head/ner.py index dfec122..9b6c67f 100644 --- a/paddlepalm/head/ner.py +++ b/paddlepalm/head/ner.py @@ -118,9 +118,9 @@ class SequenceLabel(Head): def epoch_postprocess(self, post_inputs, output_dir=None): # there is no post_inputs needed and not declared in epoch_inputs_attrs, hence no elements exist in post_inputs if not self._is_training: - if output_dir is None: - raise ValueError('argument output_dir not found in config. Please add it into config dict/file.') - with open(os.path.join(output_dir, 'predictions.json'), 'w') as writer: - for p in self._preds: - writer.write(str(p)+'\n') - print('Predictions saved at '+os.path.join(output_dir, 'predictions.json')) + if output_dir is not None: + with open(os.path.join(output_dir, 'predictions.json'), 'w') as writer: + for p in self._preds: + writer.write(str(p)+'\n') + print('Predictions saved at '+os.path.join(output_dir, 'predictions.json')) + return self._preds -- GitLab