提交 431f46fb 编写于 作者: C caoying03

fix infer.

上级 f0a11911
#!/usr/bin/env python
#coding=utf-8
import pdb
import collections
import paddle.v2 as paddle
......
#!/usr/bin/env python
#coding=utf-8
import pdb
import numpy as np
__all__ = ["BeamDecoding"]
class BeamDecoding(object):
def __init__(self, documents, sentence_scores, selected_sentences,
start_scores, selected_starts, end_scores, selected_ends):
self.documents = documents
self.sentence_scores = sentence_scores
self.selected_sentences = selected_sentences
self.start_scores = start_scores
self.selected_starts = selected_starts
self.end_scores = end_scores
self.selected_ends = selected_ends
# sequence start position information for the three step search
# beam1 is to search the sequence index
self.beam1_seq_start_positions = []
# beam2 is to search the start answer span
self.beam2_seq_start_positions = []
# beam3 is to search the end answer span
self.beam3_seq_start_positions = []
self.ans_per_sample_in_a_batch = [0]
self.all_searched_ans = []
self.final_ans = [[] for i in range(len(documents))]
def _build_beam1_seq_info(self):
self.beam1_seq_start_positions.append([0])
for idx, one_doc in enumerate(self.documents):
for sentence in one_doc:
self.beam1_seq_start_positions[-1].append(
self.beam1_seq_start_positions[-1][-1] + len(sentence))
if len(self.beam1_seq_start_positions) != len(self.documents):
self.beam1_seq_start_positions.append(
[self.beam1_seq_start_positions[-1][-1]])
def _build_beam2_seq_info(self):
seq_num, beam_size = self.selected_sentences.shape
self.beam2_seq_start_positions.append([0])
for i in range(seq_num):
for j in range(beam_size):
selected_id = int(self.selected_sentences[i][j])
if selected_id == -1: break
seq_len = self.beam1_seq_start_positions[i][
selected_id + 1] - self.beam1_seq_start_positions[i][
selected_id]
self.beam2_seq_start_positions[-1].append(
self.beam2_seq_start_positions[-1][-1] + seq_len)
if len(self.beam2_seq_start_positions) != seq_num:
self.beam2_seq_start_positions.append(
[self.beam2_seq_start_positions[-1][-1]])
def _build_beam3_seq_info(self):
seq_num_in_a_batch = len(self.documents)
seq_id = 0
sub_seq_id = 0
sub_seq_count = len(self.beam2_seq_start_positions[seq_id]) - 1
self.beam3_seq_start_positions.append([0])
sub_seq_num, beam_size = self.selected_starts.shape
for i in range(sub_seq_num):
seq_len = self.beam2_seq_start_positions[seq_id][
sub_seq_id + 1] - self.beam2_seq_start_positions[seq_id][
sub_seq_id]
for j in range(beam_size):
start_id = int(self.selected_starts[i][j])
if start_id == -1: break
self.beam3_seq_start_positions[-1].append(
self.beam3_seq_start_positions[-1][-1] + seq_len - start_id)
sub_seq_id += 1
if sub_seq_id == sub_seq_count:
if len(self.beam3_seq_start_positions) != seq_num_in_a_batch:
self.beam3_seq_start_positions.append(
[self.beam3_seq_start_positions[-1][-1]])
sub_seq_id = 0
seq_id += 1
sub_seq_count = len(self.beam2_seq_start_positions[
seq_id]) - 1
assert (
self.beam3_seq_start_positions[-1][-1] == self.end_scores.shape[0])
def _build_seq_info_for_each_beam(self):
self._build_beam1_seq_info()
self._build_beam2_seq_info()
self._build_beam3_seq_info()
def _cal_ans_per_sample_in_a_batch(self):
start_row = 0
for seq in self.beam3_seq_start_positions:
end_row = start_row + len(seq) - 1
ans_count = np.sum(self.selected_ends[start_row:end_row, :] != -1.)
self.ans_per_sample_in_a_batch.append(
self.ans_per_sample_in_a_batch[-1] + ans_count)
start_row = end_row
def _get_valid_seleceted_ids(slef, mat):
flattened = []
height, width = mat.shape
for i in range(height):
for j in range(width):
if mat[i][j] == -1.: break
flattened.append([int(mat[i][j]), [i, j]])
return flattened
def decoding(self):
self._build_seq_info_for_each_beam()
self._cal_ans_per_sample_in_a_batch()
seq_id = 0
sub_seq_id = 0
sub_seq_count = len(self.beam3_seq_start_positions[seq_id]) - 1
sub_seq_num, beam_size = self.selected_ends.shape
for i in xrange(sub_seq_num):
seq_offset_in_batch = self.beam3_seq_start_positions[seq_id][
sub_seq_id]
for j in xrange(beam_size):
end_pos = int(self.selected_ends[i][j])
if end_pos == -1: break
self.all_searched_ans.append({
"score": self.end_scores[seq_offset_in_batch + end_pos],
"sentence_pos": -1,
"start_span_pos": -1,
"end_span_pos": end_pos,
"parent_ids_in_prev_beam": i
})
sub_seq_id += 1
if sub_seq_id == sub_seq_count:
seq_id += 1
if seq_id == len(self.beam3_seq_start_positions): break
sub_seq_id = 0
sub_seq_count = len(self.beam3_seq_start_positions[seq_id]) - 1
assert len(self.all_searched_ans) == self.ans_per_sample_in_a_batch[-1]
seq_id = 0
sub_seq_id = 0
sub_seq_count = len(self.beam2_seq_start_positions[seq_id]) - 1
last_row_id = None
starts = self._get_valid_seleceted_ids(self.selected_starts)
for i, ans in enumerate(self.all_searched_ans):
ans["start_span_pos"] = starts[ans["parent_ids_in_prev_beam"]][0]
seq_offset_in_batch = (
self.beam2_seq_start_positions[seq_id][sub_seq_id])
ans["score"] += self.start_scores[(
seq_offset_in_batch + ans["start_span_pos"])]
ans["parent_ids_in_prev_beam"] = starts[ans[
"parent_ids_in_prev_beam"]][1][0]
if last_row_id and last_row_id != ans["parent_ids_in_prev_beam"]:
sub_seq_id += 1
if sub_seq_id == sub_seq_count:
seq_id += 1
if seq_id == len(self.beam2_seq_start_positions): break
sub_seq_count = len(self.beam2_seq_start_positions[seq_id]) - 1
sub_seq_id = 0
last_row_id = ans["parent_ids_in_prev_beam"]
offset_info = [0]
for sen in self.beam1_seq_start_positions[:-1]:
offset_info.append(offset_info[-1] + len(sen) - 1)
sen_ids = self._get_valid_seleceted_ids(self.selected_sentences)
for ans in self.all_searched_ans:
ans["sentence_pos"] = sen_ids[ans["parent_ids_in_prev_beam"]][0]
row_id = ans["parent_ids_in_prev_beam"] / beam_size
offset = offset_info[row_id - 1] if row_id else 0
ans["score"] += self.sentence_scores[offset + ans["sentence_pos"]]
for i in range(len(self.ans_per_sample_in_a_batch) - 1):
start_pos = self.ans_per_sample_in_a_batch[i]
end_pos = self.ans_per_sample_in_a_batch[i + 1]
for ans in sorted(
self.all_searched_ans[start_pos:end_pos],
key=lambda x: x["score"],
reverse=True):
self.final_ans[i].append({
"score": ans["score"],
"label": [
ans["sentence_pos"], ans["start_span_pos"],
ans["end_span_pos"]
]
})
return self.final_ans
......@@ -27,10 +27,13 @@ class TrainerConfig(object):
data_dir = "data/featurized"
save_dir = "models"
train_batch_size = 4 * 10
test_batch_size = 1
use_gpu = True
trainer_count = 4
train_batch_size = trainer_count * 10
epochs = 100
test_batch_size = 4
epochs = 20
# for debug print, if set to 0, no information will be printed.
show_parameter_status_period = 0
......
......@@ -5,7 +5,6 @@ import sys
import gzip
import logging
import numpy as np
import pdb
import paddle.v2 as paddle
from paddle.v2.layer import parse_network
......@@ -14,6 +13,7 @@ import reader
from model import GNR
from train import choose_samples
from config import ModelConfig, TrainerConfig
from beam_decoding import BeamDecoding
logger = logging.getLogger("paddle")
logger.setLevel(logging.INFO)
......@@ -27,67 +27,44 @@ def load_reverse_dict(dict_file):
return word_dict
def parse_one_sample(raw_input_doc, sub_sen_scores, selected_sentence,
start_span_scores, selected_starts, end_span_scores,
selected_ends):
assert len(raw_input_doc) == sub_sen_scores.shape[0]
beam_size = selected_sentence.shape[1]
all_searched_ans = []
for i in xrange(selected_ends.shape[0]):
for j in xrange(selected_ends.shape[1]):
if selected_ends[i][j] == -1.: break
all_searched_ans.append({
'score': end_span_scores[int(selected_ends[i][j])],
'sentence_pos': -1,
'start_span_pos': -1,
'end_span_pos': int(selected_ends[i][j]),
'parent_ids_in_prev_beam': i
})
for path in all_searched_ans:
row_id = path['parent_ids_in_prev_beam'] / beam_size
col_id = path['parent_ids_in_prev_beam'] % beam_size
path['start_span_pos'] = int(selected_starts[row_id][col_id])
path['score'] += start_span_scores[path['start_span_pos']]
path['parent_ids_in_prev_beam'] = row_id
for path in all_searched_ans:
row_id = path['parent_ids_in_prev_beam'] / beam_size
col_id = path['parent_ids_in_prev_beam'] % beam_size
path['sentence_pos'] = int(selected_sentence[row_id][col_id])
path['score'] += sub_sen_scores[path['sentence_pos']]
all_searched_ans.sort(key=lambda x: x['score'], reverse=True)
return all_searched_ans
def print_result(test_batch, predicted_ans, ids_2_word, print_top_k=1):
for i, sample in enumerate(test_batch):
query_words = [ids_2_word[ids] for ids in sample[0]]
print("query:\t%s" % (" ".join(query_words)))
print("documents:")
for j, sen in enumerate(sample[1]):
sen_words = [ids_2_word[ids] for ids in sen]
start = sample[4]
end = sample[4] + sample[5] + 1
print("%d\t%s" % (j, " ".join(sen_words)))
print("gold:\t[%d %d %d] %s" % (
sample[3], sample[4], sample[5], " ".join(
[ids_2_word[ids] for ids in sample[1][sample[3]][start:end]])))
print("predicted:")
for k in range(print_top_k):
label = predicted_ans[i][k]["label"]
start = label[1]
end = label[1] + label[2] + 1
ans_words = [
ids_2_word[ids] for ids in sample[1][label[0]][start:end]
]
print("%.4f\t[%d %d %d] %s" %
(predicted_ans[i][k]["score"], label[0], label[1], label[2],
" ".join(ans_words)))
print("\n")
def infer_a_batch(inferer, test_batch, ids_2_word, out_layer_count):
outs = inferer.infer(input=test_batch, flatten_result=False, field="value")
for test_sample in test_batch:
query_word = [ids_2_word[ids] for ids in test_sample[0]]
print("query\n\t%s\ndocument" % (" ".join(query_word)))
# iterate over each word of in document
for i, sentence in enumerate(test_sample[1]):
sen_word = [ids_2_word[ids] for ids in sentence]
print("%d\t%s" % (i, " ".join(sen_word)))
print("gold\t[%d %d %d]" %
(test_sample[3], test_sample[4], test_sample[5]))
ans = parse_one_sample(test_sample[1], *outs)[0]
ans_ids = test_sample[1][ans['sentence_pos']][ans['start_span_pos']:ans[
'start_span_pos'] + ans['end_span_pos']]
ans_str = " ".join([ids_2_word[ids] for ids in ans_ids])
print("searched answer\t[%d %d %d]\n\t%s" %
(ans['sentence_pos'], ans['start_span_pos'], ans['end_span_pos'],
ans_str))
decoder = BeamDecoding([sample[1] for sample in test_batch], *outs)
print_result(test_batch, decoder.decoding(), ids_2_word, print_top_k=10)
def infer(model_path, data_dir, test_batch_size, config):
assert os.path.exists(model_path), "The model does not exist."
paddle.init(use_gpu=False, trainer_count=1)
paddle.init(use_gpu=True, trainer_count=1)
ids_2_word = load_reverse_dict(config.dict_path)
......@@ -96,6 +73,8 @@ def infer(model_path, data_dir, test_batch_size, config):
# load the trained models
parameters = paddle.parameters.Parameters.from_tar(
gzip.open(model_path, "r"))
logger.info("loading parameter is done.")
inferer = paddle.inference.Inference(
output_layer=outputs, parameters=parameters)
......@@ -115,5 +94,6 @@ def infer(model_path, data_dir, test_batch_size, config):
if __name__ == "__main__":
infer("models/pass_00003.tar.gz", TrainerConfig.data_dir,
# infer("models/round1/pass_00000.tar.gz", TrainerConfig.data_dir,
infer("models/round2_on_cpu/pass_00000.tar.gz", TrainerConfig.data_dir,
TrainerConfig.test_batch_size, ModelConfig)
#!/usr/bin/env python
#coding=utf-8
import pdb
import paddle.v2 as paddle
from paddle.v2.layer import parse_network
import basic_modules
......@@ -35,6 +33,7 @@ def encode_question(input_embedding, config, prefix):
act=paddle.activation.Linear())
weights = paddle.layer.fc(input=lstm_outs,
size=1,
bias_attr=False,
act=paddle.activation.SequenceSoftmax())
weighted_candidates = paddle.layer.scaling(input=candidates, weight=weights)
passage_indep_embedding = paddle.layer.pooling(
......@@ -63,7 +62,12 @@ def question_aligned_passage_embedding(question_lstm_outs, document_embeddings,
weights = paddle.layer.fc(
input=[question_lstm_outs, doc_word_expand],
param_attr=[
paddle.attr.Param(initial_std=1e-3),
paddle.attr.Param(initial_std=1e-3)
],
size=1,
bias_attr=False,
act=paddle.activation.SequenceSoftmax())
weighted_candidates = paddle.layer.scaling(
input=question_outs_proj, weight=weights)
......@@ -111,20 +115,26 @@ def search_answer(doc_lstm_outs, sentence_idx, start_idx, end_idx, config,
is_infer):
last_state_of_sentence = paddle.layer.last_seq(
input=doc_lstm_outs, agg_level=paddle.layer.AggregateLevel.TO_SEQUENCE)
sentence_scores = paddle.layer.fc(input=last_state_of_sentence,
size=1,
bias_attr=False,
act=paddle.activation.Linear())
sentence_scores = paddle.layer.fc(
input=last_state_of_sentence,
size=1,
bias_attr=False,
param_attr=paddle.attr.Param(initial_std=1e-3),
act=paddle.activation.Linear())
topk_sentence_ids = paddle.layer.kmax_sequence_score(
input=sentence_scores, beam_size=config.beam_size)
topk_sen = paddle.layer.sub_nested_seq(
input=doc_lstm_outs, selected_indices=topk_sentence_ids)
# expand beam to search start positions on selected sentences
start_pos_scores = paddle.layer.fc(input=topk_sen,
size=1,
bias_attr=False,
act=paddle.activation.Linear())
start_pos_scores = paddle.layer.fc(
input=topk_sen,
size=1,
layer_attr=paddle.attr.ExtraLayerAttribute(
error_clipping_threshold=10.0),
bias_attr=False,
param_attr=paddle.attr.Param(initial_std=1e-3),
act=paddle.activation.Linear())
topk_start_pos_ids = paddle.layer.kmax_sequence_score(
input=start_pos_scores, beam_size=config.beam_size)
topk_start_spans = paddle.layer.seq_slice(
......@@ -137,10 +147,12 @@ def search_answer(doc_lstm_outs, sentence_idx, start_idx, end_idx, config,
depth=config.lstm_depth,
drop_rate=config.lstm_hidden_droprate,
prefix="__end_span_embeddings__")
end_pos_scores = paddle.layer.fc(input=end_span_embedding,
size=1,
bias_attr=False,
act=paddle.activation.Linear())
end_pos_scores = paddle.layer.fc(
input=end_span_embedding,
size=1,
bias_attr=False,
param_attr=paddle.attr.Param(initial_std=1e-3),
act=paddle.activation.Linear())
topk_end_pos_ids = paddle.layer.kmax_sequence_score(
input=end_pos_scores, beam_size=config.beam_size)
......
......@@ -2,7 +2,6 @@
#coding=utf-8
from __future__ import print_function
import pdb
import os
import sys
import logging
......@@ -128,8 +127,12 @@ def build_event_handler(config, parameters, trainer, test_reader):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id and \
(not event.batch_id % config.checkpoint_period):
# save_path = os.path.join(config.save_dir,
# "checkpoint_param.latest.tar.gz")
save_path = os.path.join(config.save_dir,
"checkpoint_param.latest.tar.gz")
"pass_%05d_%03d.tar.gz" %
(event.pass_id, event.batch_id))
save_model(save_path, parameters)
if event.batch_id and not event.batch_id % config.log_period:
......@@ -156,23 +159,27 @@ def train(model_config, trainer_config):
if not os.path.exists(trainer_config.save_dir):
os.mkdir(trainer_config.save_dir)
paddle.init(use_gpu=True, trainer_count=4)
paddle.init(
use_gpu=trainer_config.use_gpu,
trainer_count=trainer_config.trainer_count)
# define the optimizer
optimizer = paddle.optimizer.Adam(
learning_rate=trainer_config.learning_rate,
gradient_clipping_threshold=50,
regularization=paddle.optimizer.L2Regularization(rate=5e-4),
model_average=paddle.optimizer.ModelAverage(average_window=0.5))
model_average=paddle.optimizer.ModelAverage(
average_window=0.5, max_average_window=1000))
# define network topology
loss = GNR(model_config)
parameters = paddle.parameters.create(loss)
show_parameter_init_info(parameters)
if trainer_config.init_model_path:
load_initial_model(trainer_config.init_model_path, parameters)
else:
show_parameter_init_info(parameters)
# load the pre-trained embeddings
parameters.set("GloveVectors",
load_pretrained_parameters(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册