diff --git a/paddlepalm/backbone/bert.py b/paddlepalm/backbone/bert.py index 2872db727fcc065298c88ccab3dedefbba171817..46924fc255a2f75a680711e908178544b3889f95 100644 --- a/paddlepalm/backbone/bert.py +++ b/paddlepalm/backbone/bert.py @@ -42,8 +42,8 @@ class BERT(Backbone): self._hidden_act = hidden_act - self._prepostprocess_dropout = hidden_dropout_prob - self._attention_dropout = attention_probs_dropout_prob + self._prepostprocess_dropout = 0. if phase == 'predict' else hidden_dropout_prob + self._attention_dropout = 0. if phase == 'predict' else attention_probs_dropout_prob self._word_emb_name = "word_embedding" self._pos_emb_name = "pos_embedding" diff --git a/paddlepalm/backbone/ernie.py b/paddlepalm/backbone/ernie.py index bbfbde65bb1f7fabfff30384b17edcc9387c8478..5619377f044ed90cfbfba2886326078b38321bb8 100644 --- a/paddlepalm/backbone/ernie.py +++ b/paddlepalm/backbone/ernie.py @@ -45,8 +45,8 @@ class ERNIE(Backbone): self._task_types = task_type_vocab_size self._hidden_act = hidden_act - self._prepostprocess_dropout = hidden_dropout_prob - self._attention_dropout = attention_probs_dropout_prob + self._prepostprocess_dropout = 0. if phase == 'predict' else hidden_dropout_prob + self._attention_dropout = 0. if phase == 'predict' else attention_probs_dropout_prob self._word_emb_name = "word_embedding" self._pos_emb_name = "pos_embedding" diff --git a/paddlepalm/head/cls.py b/paddlepalm/head/cls.py index 4da3580418a0c424e0aa17f67463b555f9344ac2..e0022ea622ecb0fef9a8d9dec8351e75823ec24b 100644 --- a/paddlepalm/head/cls.py +++ b/paddlepalm/head/cls.py @@ -94,14 +94,17 @@ class Classify(Head): def epoch_postprocess(self, post_inputs, output_dir=None): # there is no post_inputs needed and not declared in epoch_inputs_attrs, hence no elements exist in post_inputs if not self._is_training: - if output_dir is None: - raise ValueError('argument output_dir not found in config. Please add it into config dict/file.') - with open(os.path.join(output_dir, 'predictions.json'), 'w') as writer: - for i in range(len(self._preds)): - label = int(np.argmax(np.array(self._preds[i]))) - result = {'index': i, 'label': label, 'logits': self._preds[i], 'probs': self._probs[i]} - result = json.dumps(result) - writer.write(result+'\n') - print('Predictions saved at '+os.path.join(output_dir, 'predictions.json')) + results = [] + for i in range(len(self._preds)): + label = int(np.argmax(np.array(self._preds[i]))) + result = {'index': i, 'label': label, 'logits': self._preds[i], 'probs': self._probs[i]} + results.append(result) + if output_dir is not None: + with open(os.path.join(output_dir, 'predictions.json'), 'w') as writer: + for result in results: + result = json.dumps(result) + writer.write(result+'\n') + print('Predictions saved at '+os.path.join(output_dir, 'predictions.json')) + return results diff --git a/paddlepalm/head/match.py b/paddlepalm/head/match.py index 38cf1b2389ddb7426f338d86146c10bf9ffd2ce7..4921f6cca5f7785e17a061646dcfd38431dfb5c7 100644 --- a/paddlepalm/head/match.py +++ b/paddlepalm/head/match.py @@ -174,15 +174,18 @@ class Match(Head): def epoch_postprocess(self, post_inputs, output_dir=None): # there is no post_inputs needed and not declared in epoch_inputs_attrs, hence no elements exist in post_inputs if not self._is_training: - if output_dir is None: - raise ValueError('argument output_dir not found in config. Please add it into config dict/file.') - with open(os.path.join(output_dir, 'predictions.json'), 'w') as writer: - for i in range(len(self._preds)): - if self._learning_strategy == 'pointwise': - label = int(np.argmax(np.array(self._preds[i]))) - result = {'index': i, 'label': label, 'logits': self._preds_logits[i], 'probs': self._preds[i]} - elif self._learning_strategy == 'pairwise': - result = {'index': i, 'probs': self._preds[i][0]} - result = json.dumps(result, ensure_ascii=False) - writer.write(result+'\n') - print('Predictions saved at '+os.path.join(output_dir, 'predictions.json')) + results = [] + for i in range(len(self._preds)): + if self._learning_strategy == 'pointwise': + label = int(np.argmax(np.array(self._preds[i]))) + result = {'index': i, 'label': label, 'logits': self._preds_logits[i], 'probs': self._preds[i]} + elif self._learning_strategy == 'pairwise': + result = {'index': i, 'probs': self._preds[i][0]} + results.append(result) + if output_dir is not None: + with open(os.path.join(output_dir, 'predictions.json'), 'w') as writer: + for result in results: + result = json.dumps(result, ensure_ascii=False) + writer.write(result+'\n') + print('Predictions saved at '+os.path.join(output_dir, 'predictions.json')) + return results diff --git a/paddlepalm/head/mlm.py b/paddlepalm/head/mlm.py index f725c1d3b8bb450311fc98e4673114f7f49ac3f6..0bd69869df95664b3d1da797a74dec195cbaf724 100644 --- a/paddlepalm/head/mlm.py +++ b/paddlepalm/head/mlm.py @@ -128,13 +128,15 @@ class MaskLM(Head): def epoch_postprocess(self, post_inputs, output_dir=None): # there is no post_inputs needed and not declared in epoch_inputs_attrs, hence no elements exist in post_inputs if not self._is_training: - if output_dir is None: - for p in self._preds: - print(p) - else: + results = [] + for i in range(len(self._preds)): + result = {'index': i, 'word_id': self._preds[i]} + results.append(result) + if output_dir is not None: with open(os.path.join(output_dir, 'predictions.json'), 'w') as writer: - for p in self._preds: - writer.write(str(p)+'\n') + for result in results: + result = json.dumps(result) + writer.write(result+'\n') print('Predictions saved at '+os.path.join(output_dir, 'predictions.json')) - + return results diff --git a/paddlepalm/head/mrc.py b/paddlepalm/head/mrc.py index dba4fe913b8e6fa5a78359268725dfea97949070..67211c758a71727939e8949fa65ffdb37facad41 100644 --- a/paddlepalm/head/mrc.py +++ b/paddlepalm/head/mrc.py @@ -154,21 +154,21 @@ class MRC(Head): """(optional interface) this func will be called after evaluation/predicting process and each epoch during training process.""" if not self._is_training: - if output_dir is None: - raise ValueError('argument output_dir not found in config. Please add it into config dict/file.') - examples = post_inputs['reader']['examples'] - features = post_inputs['reader']['features'] - if not os.path.exists(output_dir): - os.makedirs(output_dir) - output_prediction_file = os.path.join(output_dir, "predictions.json") - output_nbest_file = os.path.join(output_dir, "nbest_predictions.json") - output_null_log_odds_file = os.path.join(output_dir, "null_odds.json") - _write_predictions(examples, features, self._pred_results, - self._n_best_size, self._max_answer_length, - self._do_lower_case, output_prediction_file, - output_nbest_file, output_null_log_odds_file, - self._with_negative, - self._null_score_diff_threshold, self._verbose) + if output_dir is not None: + examples = post_inputs['reader']['examples'] + features = post_inputs['reader']['features'] + if not os.path.exists(output_dir): + os.makedirs(output_dir) + output_prediction_file = os.path.join(output_dir, "predictions.json") + output_nbest_file = os.path.join(output_dir, "nbest_predictions.json") + output_null_log_odds_file = os.path.join(output_dir, "null_odds.json") + _write_predictions(examples, features, self._pred_results, + self._n_best_size, self._max_answer_length, + self._do_lower_case, output_prediction_file, + output_nbest_file, output_null_log_odds_file, + self._with_negative, + self._null_score_diff_threshold, self._verbose) + return self._pred_results def _write_predictions(all_examples, all_features, all_results, n_best_size, diff --git a/paddlepalm/head/ner.py b/paddlepalm/head/ner.py index dfec122d623a8b8a54463de1b75b328f7402afa5..9b6c67f62a7f2f96efbed2356e55b43a196d436b 100644 --- a/paddlepalm/head/ner.py +++ b/paddlepalm/head/ner.py @@ -118,9 +118,9 @@ class SequenceLabel(Head): def epoch_postprocess(self, post_inputs, output_dir=None): # there is no post_inputs needed and not declared in epoch_inputs_attrs, hence no elements exist in post_inputs if not self._is_training: - if output_dir is None: - raise ValueError('argument output_dir not found in config. Please add it into config dict/file.') - with open(os.path.join(output_dir, 'predictions.json'), 'w') as writer: - for p in self._preds: - writer.write(str(p)+'\n') - print('Predictions saved at '+os.path.join(output_dir, 'predictions.json')) + if output_dir is not None: + with open(os.path.join(output_dir, 'predictions.json'), 'w') as writer: + for p in self._preds: + writer.write(str(p)+'\n') + print('Predictions saved at '+os.path.join(output_dir, 'predictions.json')) + return self._preds