提交 431f46fb 编写于 作者: C caoying03

fix infer.

上级 f0a11911
#!/usr/bin/env python #!/usr/bin/env python
#coding=utf-8 #coding=utf-8
import pdb
import collections import collections
import paddle.v2 as paddle import paddle.v2 as paddle
......
#!/usr/bin/env python
#coding=utf-8
import pdb
import numpy as np
__all__ = ["BeamDecoding"]
class BeamDecoding(object):
def __init__(self, documents, sentence_scores, selected_sentences,
start_scores, selected_starts, end_scores, selected_ends):
self.documents = documents
self.sentence_scores = sentence_scores
self.selected_sentences = selected_sentences
self.start_scores = start_scores
self.selected_starts = selected_starts
self.end_scores = end_scores
self.selected_ends = selected_ends
# sequence start position information for the three step search
# beam1 is to search the sequence index
self.beam1_seq_start_positions = []
# beam2 is to search the start answer span
self.beam2_seq_start_positions = []
# beam3 is to search the end answer span
self.beam3_seq_start_positions = []
self.ans_per_sample_in_a_batch = [0]
self.all_searched_ans = []
self.final_ans = [[] for i in range(len(documents))]
def _build_beam1_seq_info(self):
self.beam1_seq_start_positions.append([0])
for idx, one_doc in enumerate(self.documents):
for sentence in one_doc:
self.beam1_seq_start_positions[-1].append(
self.beam1_seq_start_positions[-1][-1] + len(sentence))
if len(self.beam1_seq_start_positions) != len(self.documents):
self.beam1_seq_start_positions.append(
[self.beam1_seq_start_positions[-1][-1]])
def _build_beam2_seq_info(self):
seq_num, beam_size = self.selected_sentences.shape
self.beam2_seq_start_positions.append([0])
for i in range(seq_num):
for j in range(beam_size):
selected_id = int(self.selected_sentences[i][j])
if selected_id == -1: break
seq_len = self.beam1_seq_start_positions[i][
selected_id + 1] - self.beam1_seq_start_positions[i][
selected_id]
self.beam2_seq_start_positions[-1].append(
self.beam2_seq_start_positions[-1][-1] + seq_len)
if len(self.beam2_seq_start_positions) != seq_num:
self.beam2_seq_start_positions.append(
[self.beam2_seq_start_positions[-1][-1]])
def _build_beam3_seq_info(self):
seq_num_in_a_batch = len(self.documents)
seq_id = 0
sub_seq_id = 0
sub_seq_count = len(self.beam2_seq_start_positions[seq_id]) - 1
self.beam3_seq_start_positions.append([0])
sub_seq_num, beam_size = self.selected_starts.shape
for i in range(sub_seq_num):
seq_len = self.beam2_seq_start_positions[seq_id][
sub_seq_id + 1] - self.beam2_seq_start_positions[seq_id][
sub_seq_id]
for j in range(beam_size):
start_id = int(self.selected_starts[i][j])
if start_id == -1: break
self.beam3_seq_start_positions[-1].append(
self.beam3_seq_start_positions[-1][-1] + seq_len - start_id)
sub_seq_id += 1
if sub_seq_id == sub_seq_count:
if len(self.beam3_seq_start_positions) != seq_num_in_a_batch:
self.beam3_seq_start_positions.append(
[self.beam3_seq_start_positions[-1][-1]])
sub_seq_id = 0
seq_id += 1
sub_seq_count = len(self.beam2_seq_start_positions[
seq_id]) - 1
assert (
self.beam3_seq_start_positions[-1][-1] == self.end_scores.shape[0])
def _build_seq_info_for_each_beam(self):
self._build_beam1_seq_info()
self._build_beam2_seq_info()
self._build_beam3_seq_info()
def _cal_ans_per_sample_in_a_batch(self):
start_row = 0
for seq in self.beam3_seq_start_positions:
end_row = start_row + len(seq) - 1
ans_count = np.sum(self.selected_ends[start_row:end_row, :] != -1.)
self.ans_per_sample_in_a_batch.append(
self.ans_per_sample_in_a_batch[-1] + ans_count)
start_row = end_row
def _get_valid_seleceted_ids(slef, mat):
flattened = []
height, width = mat.shape
for i in range(height):
for j in range(width):
if mat[i][j] == -1.: break
flattened.append([int(mat[i][j]), [i, j]])
return flattened
def decoding(self):
self._build_seq_info_for_each_beam()
self._cal_ans_per_sample_in_a_batch()
seq_id = 0
sub_seq_id = 0
sub_seq_count = len(self.beam3_seq_start_positions[seq_id]) - 1
sub_seq_num, beam_size = self.selected_ends.shape
for i in xrange(sub_seq_num):
seq_offset_in_batch = self.beam3_seq_start_positions[seq_id][
sub_seq_id]
for j in xrange(beam_size):
end_pos = int(self.selected_ends[i][j])
if end_pos == -1: break
self.all_searched_ans.append({
"score": self.end_scores[seq_offset_in_batch + end_pos],
"sentence_pos": -1,
"start_span_pos": -1,
"end_span_pos": end_pos,
"parent_ids_in_prev_beam": i
})
sub_seq_id += 1
if sub_seq_id == sub_seq_count:
seq_id += 1
if seq_id == len(self.beam3_seq_start_positions): break
sub_seq_id = 0
sub_seq_count = len(self.beam3_seq_start_positions[seq_id]) - 1
assert len(self.all_searched_ans) == self.ans_per_sample_in_a_batch[-1]
seq_id = 0
sub_seq_id = 0
sub_seq_count = len(self.beam2_seq_start_positions[seq_id]) - 1
last_row_id = None
starts = self._get_valid_seleceted_ids(self.selected_starts)
for i, ans in enumerate(self.all_searched_ans):
ans["start_span_pos"] = starts[ans["parent_ids_in_prev_beam"]][0]
seq_offset_in_batch = (
self.beam2_seq_start_positions[seq_id][sub_seq_id])
ans["score"] += self.start_scores[(
seq_offset_in_batch + ans["start_span_pos"])]
ans["parent_ids_in_prev_beam"] = starts[ans[
"parent_ids_in_prev_beam"]][1][0]
if last_row_id and last_row_id != ans["parent_ids_in_prev_beam"]:
sub_seq_id += 1
if sub_seq_id == sub_seq_count:
seq_id += 1
if seq_id == len(self.beam2_seq_start_positions): break
sub_seq_count = len(self.beam2_seq_start_positions[seq_id]) - 1
sub_seq_id = 0
last_row_id = ans["parent_ids_in_prev_beam"]
offset_info = [0]
for sen in self.beam1_seq_start_positions[:-1]:
offset_info.append(offset_info[-1] + len(sen) - 1)
sen_ids = self._get_valid_seleceted_ids(self.selected_sentences)
for ans in self.all_searched_ans:
ans["sentence_pos"] = sen_ids[ans["parent_ids_in_prev_beam"]][0]
row_id = ans["parent_ids_in_prev_beam"] / beam_size
offset = offset_info[row_id - 1] if row_id else 0
ans["score"] += self.sentence_scores[offset + ans["sentence_pos"]]
for i in range(len(self.ans_per_sample_in_a_batch) - 1):
start_pos = self.ans_per_sample_in_a_batch[i]
end_pos = self.ans_per_sample_in_a_batch[i + 1]
for ans in sorted(
self.all_searched_ans[start_pos:end_pos],
key=lambda x: x["score"],
reverse=True):
self.final_ans[i].append({
"score": ans["score"],
"label": [
ans["sentence_pos"], ans["start_span_pos"],
ans["end_span_pos"]
]
})
return self.final_ans
...@@ -27,10 +27,13 @@ class TrainerConfig(object): ...@@ -27,10 +27,13 @@ class TrainerConfig(object):
data_dir = "data/featurized" data_dir = "data/featurized"
save_dir = "models" save_dir = "models"
train_batch_size = 4 * 10 use_gpu = True
test_batch_size = 1 trainer_count = 4
train_batch_size = trainer_count * 10
epochs = 100 test_batch_size = 4
epochs = 20
# for debug print, if set to 0, no information will be printed. # for debug print, if set to 0, no information will be printed.
show_parameter_status_period = 0 show_parameter_status_period = 0
......
...@@ -5,7 +5,6 @@ import sys ...@@ -5,7 +5,6 @@ import sys
import gzip import gzip
import logging import logging
import numpy as np import numpy as np
import pdb
import paddle.v2 as paddle import paddle.v2 as paddle
from paddle.v2.layer import parse_network from paddle.v2.layer import parse_network
...@@ -14,6 +13,7 @@ import reader ...@@ -14,6 +13,7 @@ import reader
from model import GNR from model import GNR
from train import choose_samples from train import choose_samples
from config import ModelConfig, TrainerConfig from config import ModelConfig, TrainerConfig
from beam_decoding import BeamDecoding
logger = logging.getLogger("paddle") logger = logging.getLogger("paddle")
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
...@@ -27,67 +27,44 @@ def load_reverse_dict(dict_file): ...@@ -27,67 +27,44 @@ def load_reverse_dict(dict_file):
return word_dict return word_dict
def parse_one_sample(raw_input_doc, sub_sen_scores, selected_sentence, def print_result(test_batch, predicted_ans, ids_2_word, print_top_k=1):
start_span_scores, selected_starts, end_span_scores, for i, sample in enumerate(test_batch):
selected_ends): query_words = [ids_2_word[ids] for ids in sample[0]]
assert len(raw_input_doc) == sub_sen_scores.shape[0] print("query:\t%s" % (" ".join(query_words)))
beam_size = selected_sentence.shape[1]
print("documents:")
all_searched_ans = [] for j, sen in enumerate(sample[1]):
for i in xrange(selected_ends.shape[0]): sen_words = [ids_2_word[ids] for ids in sen]
for j in xrange(selected_ends.shape[1]): start = sample[4]
if selected_ends[i][j] == -1.: break end = sample[4] + sample[5] + 1
all_searched_ans.append({ print("%d\t%s" % (j, " ".join(sen_words)))
'score': end_span_scores[int(selected_ends[i][j])], print("gold:\t[%d %d %d] %s" % (
'sentence_pos': -1, sample[3], sample[4], sample[5], " ".join(
'start_span_pos': -1, [ids_2_word[ids] for ids in sample[1][sample[3]][start:end]])))
'end_span_pos': int(selected_ends[i][j]),
'parent_ids_in_prev_beam': i print("predicted:")
}) for k in range(print_top_k):
label = predicted_ans[i][k]["label"]
for path in all_searched_ans: start = label[1]
row_id = path['parent_ids_in_prev_beam'] / beam_size end = label[1] + label[2] + 1
col_id = path['parent_ids_in_prev_beam'] % beam_size ans_words = [
path['start_span_pos'] = int(selected_starts[row_id][col_id]) ids_2_word[ids] for ids in sample[1][label[0]][start:end]
path['score'] += start_span_scores[path['start_span_pos']] ]
path['parent_ids_in_prev_beam'] = row_id print("%.4f\t[%d %d %d] %s" %
(predicted_ans[i][k]["score"], label[0], label[1], label[2],
for path in all_searched_ans: " ".join(ans_words)))
row_id = path['parent_ids_in_prev_beam'] / beam_size print("\n")
col_id = path['parent_ids_in_prev_beam'] % beam_size
path['sentence_pos'] = int(selected_sentence[row_id][col_id])
path['score'] += sub_sen_scores[path['sentence_pos']]
all_searched_ans.sort(key=lambda x: x['score'], reverse=True)
return all_searched_ans
def infer_a_batch(inferer, test_batch, ids_2_word, out_layer_count): def infer_a_batch(inferer, test_batch, ids_2_word, out_layer_count):
outs = inferer.infer(input=test_batch, flatten_result=False, field="value") outs = inferer.infer(input=test_batch, flatten_result=False, field="value")
decoder = BeamDecoding([sample[1] for sample in test_batch], *outs)
for test_sample in test_batch: print_result(test_batch, decoder.decoding(), ids_2_word, print_top_k=10)
query_word = [ids_2_word[ids] for ids in test_sample[0]]
print("query\n\t%s\ndocument" % (" ".join(query_word)))
# iterate over each word of in document
for i, sentence in enumerate(test_sample[1]):
sen_word = [ids_2_word[ids] for ids in sentence]
print("%d\t%s" % (i, " ".join(sen_word)))
print("gold\t[%d %d %d]" %
(test_sample[3], test_sample[4], test_sample[5]))
ans = parse_one_sample(test_sample[1], *outs)[0]
ans_ids = test_sample[1][ans['sentence_pos']][ans['start_span_pos']:ans[
'start_span_pos'] + ans['end_span_pos']]
ans_str = " ".join([ids_2_word[ids] for ids in ans_ids])
print("searched answer\t[%d %d %d]\n\t%s" %
(ans['sentence_pos'], ans['start_span_pos'], ans['end_span_pos'],
ans_str))
def infer(model_path, data_dir, test_batch_size, config): def infer(model_path, data_dir, test_batch_size, config):
assert os.path.exists(model_path), "The model does not exist." assert os.path.exists(model_path), "The model does not exist."
paddle.init(use_gpu=False, trainer_count=1) paddle.init(use_gpu=True, trainer_count=1)
ids_2_word = load_reverse_dict(config.dict_path) ids_2_word = load_reverse_dict(config.dict_path)
...@@ -96,6 +73,8 @@ def infer(model_path, data_dir, test_batch_size, config): ...@@ -96,6 +73,8 @@ def infer(model_path, data_dir, test_batch_size, config):
# load the trained models # load the trained models
parameters = paddle.parameters.Parameters.from_tar( parameters = paddle.parameters.Parameters.from_tar(
gzip.open(model_path, "r")) gzip.open(model_path, "r"))
logger.info("loading parameter is done.")
inferer = paddle.inference.Inference( inferer = paddle.inference.Inference(
output_layer=outputs, parameters=parameters) output_layer=outputs, parameters=parameters)
...@@ -115,5 +94,6 @@ def infer(model_path, data_dir, test_batch_size, config): ...@@ -115,5 +94,6 @@ def infer(model_path, data_dir, test_batch_size, config):
if __name__ == "__main__": if __name__ == "__main__":
infer("models/pass_00003.tar.gz", TrainerConfig.data_dir, # infer("models/round1/pass_00000.tar.gz", TrainerConfig.data_dir,
infer("models/round2_on_cpu/pass_00000.tar.gz", TrainerConfig.data_dir,
TrainerConfig.test_batch_size, ModelConfig) TrainerConfig.test_batch_size, ModelConfig)
#!/usr/bin/env python #!/usr/bin/env python
#coding=utf-8 #coding=utf-8
import pdb
import paddle.v2 as paddle import paddle.v2 as paddle
from paddle.v2.layer import parse_network from paddle.v2.layer import parse_network
import basic_modules import basic_modules
...@@ -35,6 +33,7 @@ def encode_question(input_embedding, config, prefix): ...@@ -35,6 +33,7 @@ def encode_question(input_embedding, config, prefix):
act=paddle.activation.Linear()) act=paddle.activation.Linear())
weights = paddle.layer.fc(input=lstm_outs, weights = paddle.layer.fc(input=lstm_outs,
size=1, size=1,
bias_attr=False,
act=paddle.activation.SequenceSoftmax()) act=paddle.activation.SequenceSoftmax())
weighted_candidates = paddle.layer.scaling(input=candidates, weight=weights) weighted_candidates = paddle.layer.scaling(input=candidates, weight=weights)
passage_indep_embedding = paddle.layer.pooling( passage_indep_embedding = paddle.layer.pooling(
...@@ -63,7 +62,12 @@ def question_aligned_passage_embedding(question_lstm_outs, document_embeddings, ...@@ -63,7 +62,12 @@ def question_aligned_passage_embedding(question_lstm_outs, document_embeddings,
weights = paddle.layer.fc( weights = paddle.layer.fc(
input=[question_lstm_outs, doc_word_expand], input=[question_lstm_outs, doc_word_expand],
param_attr=[
paddle.attr.Param(initial_std=1e-3),
paddle.attr.Param(initial_std=1e-3)
],
size=1, size=1,
bias_attr=False,
act=paddle.activation.SequenceSoftmax()) act=paddle.activation.SequenceSoftmax())
weighted_candidates = paddle.layer.scaling( weighted_candidates = paddle.layer.scaling(
input=question_outs_proj, weight=weights) input=question_outs_proj, weight=weights)
...@@ -111,9 +115,11 @@ def search_answer(doc_lstm_outs, sentence_idx, start_idx, end_idx, config, ...@@ -111,9 +115,11 @@ def search_answer(doc_lstm_outs, sentence_idx, start_idx, end_idx, config,
is_infer): is_infer):
last_state_of_sentence = paddle.layer.last_seq( last_state_of_sentence = paddle.layer.last_seq(
input=doc_lstm_outs, agg_level=paddle.layer.AggregateLevel.TO_SEQUENCE) input=doc_lstm_outs, agg_level=paddle.layer.AggregateLevel.TO_SEQUENCE)
sentence_scores = paddle.layer.fc(input=last_state_of_sentence, sentence_scores = paddle.layer.fc(
input=last_state_of_sentence,
size=1, size=1,
bias_attr=False, bias_attr=False,
param_attr=paddle.attr.Param(initial_std=1e-3),
act=paddle.activation.Linear()) act=paddle.activation.Linear())
topk_sentence_ids = paddle.layer.kmax_sequence_score( topk_sentence_ids = paddle.layer.kmax_sequence_score(
input=sentence_scores, beam_size=config.beam_size) input=sentence_scores, beam_size=config.beam_size)
...@@ -121,9 +127,13 @@ def search_answer(doc_lstm_outs, sentence_idx, start_idx, end_idx, config, ...@@ -121,9 +127,13 @@ def search_answer(doc_lstm_outs, sentence_idx, start_idx, end_idx, config,
input=doc_lstm_outs, selected_indices=topk_sentence_ids) input=doc_lstm_outs, selected_indices=topk_sentence_ids)
# expand beam to search start positions on selected sentences # expand beam to search start positions on selected sentences
start_pos_scores = paddle.layer.fc(input=topk_sen, start_pos_scores = paddle.layer.fc(
input=topk_sen,
size=1, size=1,
layer_attr=paddle.attr.ExtraLayerAttribute(
error_clipping_threshold=10.0),
bias_attr=False, bias_attr=False,
param_attr=paddle.attr.Param(initial_std=1e-3),
act=paddle.activation.Linear()) act=paddle.activation.Linear())
topk_start_pos_ids = paddle.layer.kmax_sequence_score( topk_start_pos_ids = paddle.layer.kmax_sequence_score(
input=start_pos_scores, beam_size=config.beam_size) input=start_pos_scores, beam_size=config.beam_size)
...@@ -137,9 +147,11 @@ def search_answer(doc_lstm_outs, sentence_idx, start_idx, end_idx, config, ...@@ -137,9 +147,11 @@ def search_answer(doc_lstm_outs, sentence_idx, start_idx, end_idx, config,
depth=config.lstm_depth, depth=config.lstm_depth,
drop_rate=config.lstm_hidden_droprate, drop_rate=config.lstm_hidden_droprate,
prefix="__end_span_embeddings__") prefix="__end_span_embeddings__")
end_pos_scores = paddle.layer.fc(input=end_span_embedding, end_pos_scores = paddle.layer.fc(
input=end_span_embedding,
size=1, size=1,
bias_attr=False, bias_attr=False,
param_attr=paddle.attr.Param(initial_std=1e-3),
act=paddle.activation.Linear()) act=paddle.activation.Linear())
topk_end_pos_ids = paddle.layer.kmax_sequence_score( topk_end_pos_ids = paddle.layer.kmax_sequence_score(
input=end_pos_scores, beam_size=config.beam_size) input=end_pos_scores, beam_size=config.beam_size)
......
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
#coding=utf-8 #coding=utf-8
from __future__ import print_function from __future__ import print_function
import pdb
import os import os
import sys import sys
import logging import logging
...@@ -128,8 +127,12 @@ def build_event_handler(config, parameters, trainer, test_reader): ...@@ -128,8 +127,12 @@ def build_event_handler(config, parameters, trainer, test_reader):
if isinstance(event, paddle.event.EndIteration): if isinstance(event, paddle.event.EndIteration):
if event.batch_id and \ if event.batch_id and \
(not event.batch_id % config.checkpoint_period): (not event.batch_id % config.checkpoint_period):
# save_path = os.path.join(config.save_dir,
# "checkpoint_param.latest.tar.gz")
save_path = os.path.join(config.save_dir, save_path = os.path.join(config.save_dir,
"checkpoint_param.latest.tar.gz") "pass_%05d_%03d.tar.gz" %
(event.pass_id, event.batch_id))
save_model(save_path, parameters) save_model(save_path, parameters)
if event.batch_id and not event.batch_id % config.log_period: if event.batch_id and not event.batch_id % config.log_period:
...@@ -156,23 +159,27 @@ def train(model_config, trainer_config): ...@@ -156,23 +159,27 @@ def train(model_config, trainer_config):
if not os.path.exists(trainer_config.save_dir): if not os.path.exists(trainer_config.save_dir):
os.mkdir(trainer_config.save_dir) os.mkdir(trainer_config.save_dir)
paddle.init(use_gpu=True, trainer_count=4) paddle.init(
use_gpu=trainer_config.use_gpu,
trainer_count=trainer_config.trainer_count)
# define the optimizer # define the optimizer
optimizer = paddle.optimizer.Adam( optimizer = paddle.optimizer.Adam(
learning_rate=trainer_config.learning_rate, learning_rate=trainer_config.learning_rate,
gradient_clipping_threshold=50,
regularization=paddle.optimizer.L2Regularization(rate=5e-4), regularization=paddle.optimizer.L2Regularization(rate=5e-4),
model_average=paddle.optimizer.ModelAverage(average_window=0.5)) model_average=paddle.optimizer.ModelAverage(
average_window=0.5, max_average_window=1000))
# define network topology # define network topology
loss = GNR(model_config) loss = GNR(model_config)
parameters = paddle.parameters.create(loss) parameters = paddle.parameters.create(loss)
show_parameter_init_info(parameters)
if trainer_config.init_model_path: if trainer_config.init_model_path:
load_initial_model(trainer_config.init_model_path, parameters) load_initial_model(trainer_config.init_model_path, parameters)
else: else:
show_parameter_init_info(parameters)
# load the pre-trained embeddings # load the pre-trained embeddings
parameters.set("GloveVectors", parameters.set("GloveVectors",
load_pretrained_parameters( load_pretrained_parameters(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册