Perhaps the main_program is not set to ParallelExecutor.
Created by: Dabulv
I0427 14:37:27.545102 21654 parallel_executor.cc:440] The Program will be executed on CUDA using ParallelExecutor, 1 cards are used, so 1 programs are executed in parallel. I0427 14:37:27.635213 21654 build_strategy.cc:365] SeqOnlyAllReduceOps:0, num_trainers:1 I0427 14:37:27.762869 21654 parallel_executor.cc:307] Inplace strategy is enabled, when build_strategy.enable_inplace = True I0427 14:37:27.837890 21654 parallel_executor.cc:375] Garbage collection strategy is enabled, when FLAGS_eager_delete_tensor_gb = 0 ./python_paddle/lib/python3.6/site-packages/paddle/fluid/executor.py:782: UserWarning: The following exception is not an EOF exception. "The following exception is not an EOF exception.") Traceback (most recent call last): File "./python_paddle/lib/python3.6/runpy.py", line 193, in _run_module_as_main "main", mod_spec) File "./python_paddle/lib/python3.6/runpy.py", line 85, in _run_code exec(code, run_globals) File "test.py", line 140, in trainer.train(print_steps=print_steps) File "./python_paddle/lib/python3.6/site-packages/paddlepalm/multihead_trainer.py", line 226, in train rt_outputs, task_id = self.train_one_step(feed) File "./python_paddle/lib/python3.6/site-packages/paddlepalm/multihead_trainer.py", line 282, in train_one_step rt_outputs = self._trainers[task_id].train_one_step(batch) File "./python_paddle/lib/python3.6/site-packages/paddlepalm/trainer.py", line 742, in train_one_step rt_outputs = exe.run(distribute_train_prog, feed=feed, fetch_list=fetch_list) File "./python_paddle/lib/python3.6/site-packages/paddle/fluid/executor.py", line 783, in run six.reraise(*sys.exc_info()) File "./python_paddle/lib/python3.6/site-packages/six.py", line 693, in reraise raise value File "./python_paddle/lib/python3.6/site-packages/paddle/fluid/executor.py", line 778, in run use_program_cache=use_program_cache) File "./python_paddle/lib/python3.6/site-packages/paddle/fluid/executor.py", line 843, in _run_impl return_numpy=return_numpy) File "./python_paddle/lib/python3.6/site-packages/paddle/fluid/executor.py", line 677, in _run_parallel tensors = exe.run(fetch_var_names)._move_to_list() paddle.fluid.core_avx.EnforceNotMet:
C++ Call Stacks (More useful to developers):
0 std::string paddle::platform::GetTraceBackString<std::string const&>(std::string const&, char const*, int) 1 paddle::platform::EnforceNotMet::EnforceNotMet(std::string const&, char const*, int) 2 paddle::framework::details::FastThreadedSSAGraphExecutor::InsertFetchOps(std::vector<std::string, std::allocatorstd::string > const&, std::vector<paddle::framework::LoDTensor, std::allocatorpaddle::framework::LoDTensor >, std::unordered_map<std::string, std::vector<paddle::framework::details::VarHandleBase, std::allocatorpaddle::framework::details::VarHandleBase* >, std::hashstd::string, std::equal_tostd::string, std::allocator<std::pair<std::string const, std::vector<paddle::framework::details::VarHandleBase*, std::allocatorpaddle::framework::details::VarHandleBase* > > > >, std::unordered_map<paddle::framework::details::OpHandleBase, std::atomic, std::hashpaddle::framework::details::OpHandleBase*, std::equal_topaddle::framework::details::OpHandleBase*, std::allocator<std::pair<paddle::framework::details::OpHandleBase* const, std::atomic > > >, std::vector<paddle::framework::details::OpHandleBase, std::allocatorpaddle::framework::details::OpHandleBase* >, std::vector<paddle::framework::details::OpHandleBase, std::allocatorpaddle::framework::details::OpHandleBase* >*) 3 paddle::framework::details::FastThreadedSSAGraphExecutor::Run(std::vector<std::string, std::allocatorstd::string > const&) 4 paddle::framework::details::ScopeBufferedMonitor::Apply(std::function<void ()> const&, bool) 5 paddle::framework::details::ScopeBufferedSSAGraphExecutor::Run(std::vector<std::string, std::allocatorstd::string > const&) 6 paddle::framework::ParallelExecutor::Run(std::vector<std::string, std::allocatorstd::string > const&)
Error Message Summary:
PreconditionNotMetError: Cannot find fetched variable(dvqa.tmp_1). Perhaps the main_program is not set to ParallelExecutor. [Hint: Expected fetched_var_it != fetched_vars->end(), but received fetched_var_it == fetched_vars->end().] at (/paddle/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc:147)