diff --git a/fluid/neural_machine_translation/transformer/config.py b/fluid/neural_machine_translation/transformer/config.py index a4e588c620f21c4f38eb1906f55d68ddf93214b6..e68ab17e69eff890cb8e6b028ead5e6163213761 100644 --- a/fluid/neural_machine_translation/transformer/config.py +++ b/fluid/neural_machine_translation/transformer/config.py @@ -62,10 +62,8 @@ class ModelHyperParams(object): eos_idx = 1 # index for token unk_idx = 2 - # max length of sequences. - # The size of position encoding table should at least plus 1, since the - # sinusoid position encoding starts from 1 and 0 can be used as the padding - # token for position encoding. + # max length of sequences deciding the size of position encoding table. + # Start from 1 and count start and end tokens in. max_length = 256 # the dimension for word embeddings, which is also the last dimension of # the input and output of multi-head attention, position-wise feed-forward diff --git a/fluid/neural_machine_translation/transformer/infer.py b/fluid/neural_machine_translation/transformer/infer.py index 874028081cca218ae16559af9ea9b05d3494c977..505bf0b0062bda27a0299ed7d844e2f05abd95b8 100644 --- a/fluid/neural_machine_translation/transformer/infer.py +++ b/fluid/neural_machine_translation/transformer/infer.py @@ -543,7 +543,8 @@ def infer(args, inferencer=fast_infer): start_mark=args.special_token[0], end_mark=args.special_token[1], unk_mark=args.special_token[2], - max_length=ModelHyperParams.max_length, + # count start and end tokens out + max_length=ModelHyperParams.max_length - 2, clip_last_batch=False) trg_idx2word = test_data.load_dict( dict_path=args.trg_vocab_fpath, reverse=True) diff --git a/fluid/neural_machine_translation/transformer/train.py b/fluid/neural_machine_translation/transformer/train.py index e3c9b62d068b7cbf0433328d1fcb559a4e659166..cdd7dfed8235a42da867e08e16e0aef4ba500fa1 100644 --- a/fluid/neural_machine_translation/transformer/train.py +++ b/fluid/neural_machine_translation/transformer/train.py @@ -290,7 +290,8 @@ def train(args): start_mark=args.special_token[0], end_mark=args.special_token[1], unk_mark=args.special_token[2], - max_length=ModelHyperParams.max_length, + # count start and end tokens out + max_length=ModelHyperParams.max_length - 2, clip_last_batch=False) train_data = read_multiple( reader=train_data.batch_generator, @@ -326,7 +327,8 @@ def train(args): start_mark=args.special_token[0], end_mark=args.special_token[1], unk_mark=args.special_token[2], - max_length=ModelHyperParams.max_length, + # count start and end tokens out + max_length=ModelHyperParams.max_length - 2, clip_last_batch=False, shuffle=False, shuffle_batch=False)