提交 388ae6ed 编写于 作者: G guosheng

Fix Transformer in 1.0

上级 02224d03
......@@ -171,7 +171,7 @@ def fast_infer(test_data, trg_idx2word, use_wordpiece):
])
# This is used here to set dropout to the test mode.
infer_program = fluid.default_main_program().inference_optimize()
infer_program = fluid.default_main_program().clone(for_test=True)
for batch_id, data in enumerate(test_data.batch_generator()):
data_input = prepare_batch_input(
......
......@@ -428,7 +428,7 @@ def train_loop(exe, train_prog, startup_prog, dev_count, sum_cost, avg_cost,
# Since the token number differs among devices, customize gradient scale to
# use token average cost among multi-devices. and the gradient scale is
# `1 / token_number` for average cost.
build_strategy.gradient_scale_strategy = fluid.BuildStrategy.GradientScaleStrategy.Customized
# build_strategy.gradient_scale_strategy = fluid.BuildStrategy.GradientScaleStrategy.Customized
train_exe = fluid.ParallelExecutor(
use_cuda=TrainTaskConfig.use_gpu,
loss_name=avg_cost.name,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册