提交 75bfdb3a 编写于 作者: T typhoonzero

refine

上级 fc4bcdd7
...@@ -279,7 +279,7 @@ std::unique_ptr<ExecutorPrepareContext> Executor::Prepare( ...@@ -279,7 +279,7 @@ std::unique_ptr<ExecutorPrepareContext> Executor::Prepare(
return std::unique_ptr<ExecutorPrepareContext>(ctx); return std::unique_ptr<ExecutorPrepareContext>(ctx);
} }
std::vector<std::shared_ptr<ExecutorPrepareContext>> Prepare( std::vector<std::shared_ptr<ExecutorPrepareContext>> Executor::Prepare(
const ProgramDesc& program, const std::vector<int>& block_ids) { const ProgramDesc& program, const std::vector<int>& block_ids) {
std::vector<std::shared_ptr<ExecutorPrepareContext>> result; std::vector<std::shared_ptr<ExecutorPrepareContext>> result;
for (auto& bid : block_ids) { for (auto& bid : block_ids) {
......
...@@ -54,20 +54,24 @@ static void CreateTensorFromMessageType(framework::Variable *var, ...@@ -54,20 +54,24 @@ static void CreateTensorFromMessageType(framework::Variable *var,
} }
} }
static void ParallelExecuteBlocks(const std::vector<size_t> &parallel_blkids, static void ParallelExecuteBlocks(
framework::Executor *executor, const std::vector<size_t> &parallel_blkids, framework::Executor *executor,
framework::ProgramDesc *program, const std::vector<std::shared_ptr<framework::ExecutorPrepareContext>>
framework::Scope *scope) { &prepared,
framework::ProgramDesc *program, framework::Scope *scope) {
std::vector<std::future<void>> fs; std::vector<std::future<void>> fs;
for (size_t idx : parallel_blkids) { for (size_t idx : parallel_blkids) {
fs.push_back(framework::Async([&executor, &program, &scope, idx]() { fs.push_back(
int run_block = idx; // thread local framework::Async([&executor, &prepared, &program, &scope, idx]() {
try { int run_block = idx; // thread local
executor->Run(*program, scope, run_block, false, false); try {
} catch (std::exception &e) { // executor->Run(*program, scope, run_block, false, false);
LOG(ERROR) << "run sub program error " << e.what(); executor->RunPreparedContext(prepared[run_block].get(), scope,
} false, false);
})); } catch (std::exception &e) {
LOG(ERROR) << "run sub program error " << e.what();
}
}));
} }
for (size_t i = 0; i < fs.size(); ++i) fs[i].wait(); for (size_t i = 0; i < fs.size(); ++i) fs[i].wait();
} }
...@@ -105,15 +109,18 @@ class ListenAndServOp : public framework::OperatorBase { ...@@ -105,15 +109,18 @@ class ListenAndServOp : public framework::OperatorBase {
auto *block = Attr<framework::BlockDesc *>(kOptimizeBlock); auto *block = Attr<framework::BlockDesc *>(kOptimizeBlock);
auto *program = block->Program(); auto *program = block->Program();
int num_blocks = program->Size(); size_t num_blocks = program->Size();
PADDLE_ENFORCE_GE(num_blocks, 2, PADDLE_ENFORCE_GE(num_blocks, 2,
"server program should have at least 2 blocks"); "server program should have at least 2 blocks");
framework::Executor executor(dev_place); framework::Executor executor(dev_place);
std::vector<int> block_list; std::vector<int> block_list;
for (int blkid = 1; blkid < num_blocks; ++blkid) for (size_t blkid = 1; blkid < num_blocks; ++blkid)
block_list.push_back(blkid); block_list.push_back(blkid);
auto prepared = executor.Prepare(*program, block_list); auto prepared = executor.Prepare(*program, block_list);
prepared.insert(
prepared.begin(),
std::shared_ptr<framework::ExecutorPrepareContext>(nullptr));
// TODO(typhoonzero): change this to a while_op for every cluster-batch. // TODO(typhoonzero): change this to a while_op for every cluster-batch.
bool exit_flag = false; bool exit_flag = false;
...@@ -161,21 +168,22 @@ class ListenAndServOp : public framework::OperatorBase { ...@@ -161,21 +168,22 @@ class ListenAndServOp : public framework::OperatorBase {
// The optimize blocks which have the same parent ID would run parallel // The optimize blocks which have the same parent ID would run parallel
// TODO(Yancey1989): need to use ParallelExecutor for future // TODO(Yancey1989): need to use ParallelExecutor for future
size_t last_parent_blkid = program->Block(1).Parent(); int32_t last_parent_blkid = program->Block(1).Parent();
std::vector<size_t> parallel_blkids; std::vector<size_t> parallel_blkids;
parallel_blkids.push_back(1); parallel_blkids.push_back(1);
double ts = detail::GetTimestamp(); double ts = detail::GetTimestamp();
for (size_t blkid = 2; blkid < num_blocks; ++blkid) { for (size_t blkid = 2; blkid < num_blocks; ++blkid) {
if (program->Block(blkid).Parent() != last_parent_blkid) { if (program->Block(blkid).Parent() != last_parent_blkid) {
for (size_t idx : parallel_blkids) VLOG(3) << idx; for (size_t idx : parallel_blkids) VLOG(3) << idx;
ParallelExecuteBlocks(parallel_blkids, &executor, program, ParallelExecuteBlocks(parallel_blkids, &executor, prepared, program,
&recv_scope); &recv_scope);
parallel_blkids.clear(); parallel_blkids.clear();
last_parent_blkid = program->Block(blkid).Parent(); last_parent_blkid = program->Block(blkid).Parent();
} }
parallel_blkids.push_back(blkid); parallel_blkids.push_back(blkid);
} }
ParallelExecuteBlocks(parallel_blkids, &executor, program, &recv_scope); ParallelExecuteBlocks(parallel_blkids, &executor, prepared, program,
&recv_scope);
VLOG(2) << "run all blocks spent (ms) " << detail::GetTimestamp() - ts; VLOG(2) << "run all blocks spent (ms) " << detail::GetTimestamp() - ts;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册