未验证 提交 67f0fe7a 编写于 作者: C chengduo 提交者: GitHub

Update transformer (#2444)

* update transformer
上级 3370e5f6
......@@ -376,7 +376,8 @@ def py_reader_provider_wrapper(data_reader, place):
data, data_input_names, ModelHyperParams.eos_idx,
ModelHyperParams.eos_idx, ModelHyperParams.n_head,
ModelHyperParams.d_model)
yield [data_input_dict[item] for item in data_input_names]
total_dict = dict(data_input_dict.items())
yield [total_dict[item] for item in data_input_names]
return py_reader_provider
......@@ -422,14 +423,11 @@ def test_context(exe, train_exe, dev_count):
fluid.io.load_persistables(
exe, TrainTaskConfig.ckpt_path, main_program=test_prog)
exec_strategy = fluid.ExecutionStrategy()
exec_strategy.use_experimental_executor = True
build_strategy = fluid.BuildStrategy()
test_exe = fluid.ParallelExecutor(
use_cuda=TrainTaskConfig.use_gpu,
main_program=test_prog,
build_strategy=build_strategy,
exec_strategy=exec_strategy,
share_vars_from=train_exe)
def test(exe=test_exe, pyreader=pyreader):
......@@ -494,13 +492,18 @@ def train_loop(exe,
# For faster executor
exec_strategy = fluid.ExecutionStrategy()
exec_strategy.use_experimental_executor = True
exec_strategy.num_iteration_per_drop_scope = int(args.fetch_steps)
build_strategy = fluid.BuildStrategy()
build_strategy.memory_optimize = False
build_strategy.enable_inplace = True
sum_cost.persistable = True
token_num.persistable = True
# Since the token number differs among devices, customize gradient scale to
# use token average cost among multi-devices. and the gradient scale is
# `1 / token_number` for average cost.
# build_strategy.gradient_scale_strategy = fluid.BuildStrategy.GradientScaleStrategy.Customized
build_strategy.fuse_all_optimizer_ops = True
logging.info("begin executor")
train_exe = fluid.ParallelExecutor(
......@@ -632,7 +635,8 @@ def train(args):
place = fluid.CPUPlace()
dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
else:
place = fluid.CUDAPlace(0)
gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
place = fluid.CUDAPlace(gpu_id)
dev_count = fluid.core.get_cuda_device_count()
exe = fluid.Executor(place)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册