提交 1f063d09 编写于 作者: Y Yu Yang

Memorder

上级 b1cb8bbd
...@@ -643,14 +643,16 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors, ...@@ -643,14 +643,16 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
for (auto &place_pair : member_->vars_) { for (auto &place_pair : member_->vars_) {
for (auto &name_pair : place_pair.second) { for (auto &name_pair : place_pair.second) {
for (auto &version_pair : name_pair.second) { for (auto &version_pair : name_pair.second) {
pending_vars[&version_pair.second] = pending_vars[&version_pair.second].store(
version_pair.second.generated_op_ == nullptr; version_pair.second.generated_op_ == nullptr,
std::memory_order_relaxed);
} }
} }
} }
for (auto &var : member_->dep_vars_) { for (auto &var : member_->dep_vars_) {
pending_vars[var.get()] = var->generated_op_ == nullptr; pending_vars[var.get()].store(var->generated_op_ == nullptr,
std::memory_order_relaxed);
} }
std::vector<OpHandle *> to_run; std::vector<OpHandle *> to_run;
...@@ -700,14 +702,12 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors, ...@@ -700,14 +702,12 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
} }
while (!pending_ops.empty()) { while (!pending_ops.empty()) {
VLOG(1) << "1";
VarHandleBase *ready_var = nullptr; VarHandleBase *ready_var = nullptr;
for (auto &pair : pending_vars) { for (auto &pair : pending_vars) {
if (pair.second.load(std::memory_order_acquire)) { if (pair.second.load(std::memory_order_acquire)) {
ready_var = pair.first; ready_var = pair.first;
} }
} }
VLOG(1) << "1";
if (ready_var == nullptr) { if (ready_var == nullptr) {
// FIXME use conditional var instead of busy wait. // FIXME use conditional var instead of busy wait.
...@@ -717,11 +717,8 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors, ...@@ -717,11 +717,8 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
continue; continue;
} }
VLOG(1) << "1";
pending_vars.erase(ready_var); pending_vars.erase(ready_var);
VLOG(1) << "1";
to_run.clear(); to_run.clear();
VLOG(1) << "1";
for (auto *op : ready_var->pending_ops_) { for (auto *op : ready_var->pending_ops_) {
auto &deps = pending_ops[op]; auto &deps = pending_ops[op];
--deps; --deps;
...@@ -729,16 +726,12 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors, ...@@ -729,16 +726,12 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
to_run.emplace_back(op); to_run.emplace_back(op);
} }
} }
VLOG(1) << "1";
for (auto *op : to_run) { for (auto *op : to_run) {
pending_ops.erase(op); pending_ops.erase(op);
RunOp(pending_vars, op); RunOp(pending_vars, op);
} }
VLOG(1) << "1";
} }
VLOG(1) << "1";
fetch_ops.clear(); fetch_ops.clear();
VLOG(1) << "1";
*member_->global_scope_->Var(fetched_var_name)->GetMutable<LoDTensorArray>() = *member_->global_scope_->Var(fetched_var_name)->GetMutable<LoDTensorArray>() =
fetched_data->tensors_; fetched_data->tensors_;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册