diff --git a/fluid/neural_machine_translation/transformer/model.py b/fluid/neural_machine_translation/transformer/model.py index 5c11a5a3acb77dc207d35a78029bdafcfced5be8..bd83d758a757c4c6d8047bd4ce9456e64bb831e9 100644 --- a/fluid/neural_machine_translation/transformer/model.py +++ b/fluid/neural_machine_translation/transformer/model.py @@ -522,13 +522,13 @@ def wrap_decoder(trg_vocab_size, x=dec_output, y=fluid.get_var(word_emb_param_names[0]), transpose_y=True) - predict = layers.softmax(predict) else: predict = layers.fc(input=dec_output, size=trg_vocab_size, bias_attr=False, - num_flatten_dims=2, - act='softmax') + num_flatten_dims=2) + if dec_inputs is None: + predict = layers.softmax(predict) return predict