diff --git a/paddle/fluid/operators/listen_and_serv_op.cc b/paddle/fluid/operators/listen_and_serv_op.cc index 6fb17470a3b08ebfc4ac8cb0022cfba037747578..8cae29d74dff5ad1506197100efe88240d572794 100644 --- a/paddle/fluid/operators/listen_and_serv_op.cc +++ b/paddle/fluid/operators/listen_and_serv_op.cc @@ -139,27 +139,39 @@ class ListenAndServOp : public framework::OperatorBase { // should be global ops. // NOTE: if is_gpu_place, CUDA kernels are laugched by multiple threads // and this will still work. + double ts = detail::GetTimestamp(); std::vector> fs; for (int blkid = 0; blkid < num_blocks - 1; ++blkid) { fs.push_back(framework::Async([&]() { try { + VLOG(2) << "begin run in thread" << blkid; executor.Run(*program, &recv_scope, blkid, false /*create_local_scope*/, false /*create_vars*/); + VLOG(2) << "end run in thread"; } catch (std::exception &e) { LOG(ERROR) << "run sub program error " << e.what(); } })); } + VLOG(2) << "waiting opts..."; for (int blkid = 0; blkid < num_blocks - 1; ++blkid) fs[blkid].wait(); + VLOG(2) << "waiting opts...OK"; // Run global block at final step if (num_blocks > 2) { try { executor.Run(*program, &recv_scope, num_blocks - 1, false /*create_local_scope*/, false /*create_vars*/); + VLOG(2) << "run global OK , spent " << detail::GetTimestamp() - ts; } catch (std::exception &e) { LOG(ERROR) << "run sub program error " << e.what(); } } + for (auto &n : recv_scope.LocalVarNames()) { + VLOG(2) << "vars in scope: " << n; + } + for (auto &n : recv_scope.LocalVarNames()) { + VLOG(2) << "vars in parent scope: " << n; + } // Reset the received sparse variables, the sum operator would not // sum the input sparse variables which rows is empty at the next