提交 d67cd35f 编写于 作者: Y Yu Yang

Use AvgCost instead of customize loss

上级 5efb3d3d
...@@ -363,15 +363,10 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler, ...@@ -363,15 +363,10 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
count=dev_count if args.use_token_batch else 1) count=dev_count if args.use_token_batch else 1)
build_strategy = fluid.BuildStrategy() build_strategy = fluid.BuildStrategy()
# Since the token number differs among devices, customize gradient scale to
# use token average cost among multi-devices. and the gradient scale is
# `1 / token_number` for average cost.
build_strategy.gradient_scale_strategy = fluid.BuildStrategy.GradientScaleStrategy.Customized
train_exe = fluid.ParallelExecutor( train_exe = fluid.ParallelExecutor(
use_cuda=TrainTaskConfig.use_gpu, use_cuda=TrainTaskConfig.use_gpu,
loss_name=sum_cost.name, loss_name=avg_cost.name,
main_program=train_progm, main_program=train_progm)
build_strategy=build_strategy)
data_input_names = encoder_data_input_fields + decoder_data_input_fields[: data_input_names = encoder_data_input_fields + decoder_data_input_fields[:
-1] + label_data_input_fields -1] + label_data_input_fields
...@@ -482,10 +477,10 @@ def train(args): ...@@ -482,10 +477,10 @@ def train(args):
beta1=TrainTaskConfig.beta1, beta1=TrainTaskConfig.beta1,
beta2=TrainTaskConfig.beta2, beta2=TrainTaskConfig.beta2,
epsilon=TrainTaskConfig.eps) epsilon=TrainTaskConfig.eps)
optimizer.minimize(sum_cost) optimizer.minimize(avg_cost)
elif args.sync == False: elif args.sync == False:
optimizer = fluid.optimizer.SGD(0.003) optimizer = fluid.optimizer.SGD(0.003)
optimizer.minimize(sum_cost) optimizer.minimize(avg_cost)
else: else:
lr_decay = fluid.layers\ lr_decay = fluid.layers\
.learning_rate_scheduler\ .learning_rate_scheduler\
...@@ -497,7 +492,7 @@ def train(args): ...@@ -497,7 +492,7 @@ def train(args):
beta1=TrainTaskConfig.beta1, beta1=TrainTaskConfig.beta1,
beta2=TrainTaskConfig.beta2, beta2=TrainTaskConfig.beta2,
epsilon=TrainTaskConfig.eps) epsilon=TrainTaskConfig.eps)
optimizer.minimize(sum_cost) optimizer.minimize(avg_cost)
if args.local: if args.local:
print("local start_up:") print("local start_up:")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册