From 3e9fccea938e214b4f19f0a2971be5d49f3ecafb Mon Sep 17 00:00:00 2001 From: guosheng Date: Fri, 8 Jun 2018 14:09:17 +0800 Subject: [PATCH] Make outputs between fast_infer and the original python infer alignment in Transformer --- .../transformer/config.py | 6 +-- .../transformer/infer.py | 22 ++++++++--- .../transformer/model.py | 37 +++++++++++-------- .../transformer/train.py | 2 + 4 files changed, 43 insertions(+), 24 deletions(-) diff --git a/fluid/neural_machine_translation/transformer/config.py b/fluid/neural_machine_translation/transformer/config.py index ae24406e..b9b4b964 100644 --- a/fluid/neural_machine_translation/transformer/config.py +++ b/fluid/neural_machine_translation/transformer/config.py @@ -42,9 +42,9 @@ class InferTaskConfig(object): # the number of decoded sentences to output. n_best = 1 # the flags indicating whether to output the special tokens. - output_bos = False - output_eos = False - output_unk = False + output_bos = True #False + output_eos = True #False + output_unk = True #False # the directory for loading the trained model. model_path = "trained_models/pass_1.infer.model" diff --git a/fluid/neural_machine_translation/transformer/infer.py b/fluid/neural_machine_translation/transformer/infer.py index e054c2b0..b72e84af 100644 --- a/fluid/neural_machine_translation/transformer/infer.py +++ b/fluid/neural_machine_translation/transformer/infer.py @@ -275,11 +275,11 @@ def translate_batch(exe, top_k_indice = np.argpartition(predict, -beam_size)[-beam_size:] top_scores_ids = top_k_indice[np.argsort(predict[top_k_indice])[:: -1]] - top_scores_ids = np.asarray( - sorted( - top_scores_ids, - lambda x, y: x / predict_all.shape[-1] - y / predict_all.shape[-1] - )) # sort by pre_branch and score to compare with fast_infer + # top_scores_ids = np.asarray( + # sorted( + # top_scores_ids, + # lambda x, y: x / predict_all.shape[-1] - y / predict_all.shape[-1] + # )) # sort by pre_branch and score to compare with fast_infer top_scores = predict[top_scores_ids] scores[beam_idx] = top_scores prev_branchs[beam_idx].append(top_scores_ids / @@ -368,6 +368,7 @@ def infer(args): start_mark=args.special_token[0], end_mark=args.special_token[1], unk_mark=args.special_token[2], + max_length=ModelHyperParams.max_length, clip_last_batch=False) trg_idx2word = test_data.load_dict( @@ -394,6 +395,8 @@ def infer(args): seq) for batch_id, data in enumerate(test_data.batch_generator()): + if batch_id != 0: + continue batch_seqs, batch_scores = translate_batch( exe, [item[0] for item in data], @@ -422,6 +425,8 @@ def infer(args): scores = batch_scores[i] for seq in seqs: print(" ".join([trg_idx2word[idx] for idx in seq])) + print scores + exit(0) def prepare_batch_input(insts, data_input_names, util_input_names, src_pad_idx, @@ -522,12 +527,15 @@ def fast_infer(args): start_mark=args.special_token[0], end_mark=args.special_token[1], unk_mark=args.special_token[2], + max_length=ModelHyperParams.max_length, clip_last_batch=False) trg_idx2word = test_data.load_dict( dict_path=args.trg_vocab_fpath, reverse=True) for batch_id, data in enumerate(test_data.batch_generator()): + if batch_id != 0: + continue data_input = prepare_batch_input( data, encoder_data_input_fields + fast_decoder_data_input_fields, encoder_util_input_fields + fast_decoder_util_input_fields, @@ -540,6 +548,7 @@ def fast_infer(args): # print np.array(seq_ids)#, np.array(seq_scores) # print seq_ids.lod()#, seq_scores.lod() hyps = [[] for i in range(len(data))] + scores = [[] for i in range(len(data))] for i in range(len(seq_ids.lod()[0]) - 1): # for each source sentence start = seq_ids.lod()[0][i] end = seq_ids.lod()[0][i + 1] @@ -550,8 +559,11 @@ def fast_infer(args): trg_idx2word[idx] for idx in np.array(seq_ids)[sub_start:sub_end] ])) + scores[i].append(np.array(seq_scores)[sub_end - 1]) print hyps[i] + print scores[i] print len(hyps[i]), [len(hyp.split()) for hyp in hyps[i]] + exit(0) if __name__ == "__main__": diff --git a/fluid/neural_machine_translation/transformer/model.py b/fluid/neural_machine_translation/transformer/model.py index 9cbdf582..0e109a88 100644 --- a/fluid/neural_machine_translation/transformer/model.py +++ b/fluid/neural_machine_translation/transformer/model.py @@ -123,15 +123,15 @@ def multi_head_attention(queries, act="softmax") weights = layers.reshape( x=weights, shape=product.shape, actual_shape=post_softmax_shape) - global FLAG - if FLAG: - print "hehehehehe" - layers.Print(scaled_q) - layers.Print(k) - layers.Print(v) - layers.Print(product) - layers.Print(weights) - FLAG = False + # global FLAG + # if FLAG: + # print "hehehehehe" + # layers.Print(scaled_q) + # layers.Print(k) + # layers.Print(v) + # layers.Print(product) + # layers.Print(weights) + # FLAG = False if dropout_rate: weights = layers.dropout( weights, dropout_prob=dropout_rate, is_test=False) @@ -694,7 +694,7 @@ def fast_decode( src_attn_pre_softmax_shape, src_attn_post_softmax_shape), enc_output=pre_enc_output, caches=pre_caches) - layers.Print(logits) + # layers.Print(logits) topk_scores, topk_indices = layers.topk(logits, k=beam_size) # layers.Print(topk_scores) # layers.Print(topk_indices) @@ -708,6 +708,7 @@ def fast_decode( topk_indices = layers.lod_reset(topk_indices, pre_ids) selected_ids, selected_scores = layers.beam_search( pre_ids=pre_ids, + pre_scores=pre_scores, ids=topk_indices, scores=accu_scores, beam_size=beam_size, @@ -735,12 +736,16 @@ def fast_decode( y=attn_post_softmax_shape_delta), slf_attn_post_softmax_shape) - max_len_cond = layers.less_than(x=step_idx, y=max_len) - all_finish_cond = layers.less_than(x=step_idx, y=max_len) - layers.logical_or(x=max_len_cond, y=all_finish_cond, out=cond) - - finished_ids, finished_scores = layers.beam_search_decode(ids, scores, - eos_idx) + length_cond = layers.less_than(x=step_idx, y=max_len) + finish_cond = layers.logical_not(layers.is_empty(x=selected_ids)) + # layers.Print(length_cond) + # layers.Print(finish_cond) + layers.logical_and(x=length_cond, y=finish_cond, out=cond) + layers.Print(step_idx) + # finished_ids, finished_scores = layers.beam_search_decode(ids, scores, + # eos_idx) + finished_ids, finished_scores = layers.beam_search_decode( + ids, scores, beam_size=beam_size, end_id=eos_idx) return finished_ids, finished_scores finished_ids, finished_scores = beam_search() diff --git a/fluid/neural_machine_translation/transformer/train.py b/fluid/neural_machine_translation/transformer/train.py index d097a079..477b61f7 100644 --- a/fluid/neural_machine_translation/transformer/train.py +++ b/fluid/neural_machine_translation/transformer/train.py @@ -288,6 +288,7 @@ def train(args): start_mark=args.special_token[0], end_mark=args.special_token[1], unk_mark=args.special_token[2], + max_length=ModelHyperParams.max_length, clip_last_batch=False) train_data = read_multiple(reader=train_data.batch_generator) @@ -315,6 +316,7 @@ def train(args): start_mark=args.special_token[0], end_mark=args.special_token[1], unk_mark=args.special_token[2], + max_length=ModelHyperParams.max_length, clip_last_batch=False, shuffle=False, shuffle_batch=False) -- GitLab