未验证 提交 6b151c0e 编写于 作者: R Ruibiao Chen 提交者: GitHub

Reduce time cost of BuildOpHappensBefore (#50137)

* Reduce time cost of BuildOpHappensBefore

* Update code

* Update code

* Improve data struct
上级 5b993f2b
...@@ -69,9 +69,15 @@ const std::map<size_t, std::set<size_t>>& DependencyBuilder::Build( ...@@ -69,9 +69,15 @@ const std::map<size_t, std::set<size_t>>& DependencyBuilder::Build(
instructions_ = &instructions; instructions_ = &instructions;
op_num_ = instructions_->size(); op_num_ = instructions_->size();
ops_before_.assign(op_num_, {});
ops_behind_.assign(op_num_, {});
op_happens_before_.assign(op_num_, std::vector<bool>(op_num_, false));
BuildDownstreamMap(); BuildDownstreamMap();
BuildOpHappensBefore(); VLOG(6) << "Finish BuildDownstreamMap";
ShrinkDownstreamMap(); ShrinkDownstreamMap();
VLOG(6) << "Finish ShrinkDownstreamMap";
if (FLAGS_new_executor_sequential_run) { if (FLAGS_new_executor_sequential_run) {
AddDependencyForSequentialRun(); AddDependencyForSequentialRun();
...@@ -81,18 +87,22 @@ const std::map<size_t, std::set<size_t>>& DependencyBuilder::Build( ...@@ -81,18 +87,22 @@ const std::map<size_t, std::set<size_t>>& DependencyBuilder::Build(
if (FLAGS_add_dependency_for_communication_op) { if (FLAGS_add_dependency_for_communication_op) {
AddDependencyForCommunicationOp(); AddDependencyForCommunicationOp();
VLOG(6) << "Finish AddDependencyForSequentialRun";
} }
AddDependencyForRandomOp(); AddDependencyForRandomOp();
AddDependencyForReadOp(); VLOG(6) << "Finish AddDependencyForRandomOp";
is_build_ = true; AddDependencyForReadOp();
VLOG(6) << "Finish AddDependencyForReadOp";
VLOG(8) << "Finish build dependency"; VLOG(6) << "Finish build dependency";
VLOG(8) << "downstream count: " << CountDownstreamMap(op_downstream_map_); VLOG(8) << "downstream count: " << CountDownstreamMap(op_downstream_map_);
VLOG(8) << "downstream_map: " << std::endl VLOG(8) << "downstream_map: " << std::endl
<< StringizeDownstreamMap(op_downstream_map_); << StringizeDownstreamMap(op_downstream_map_);
is_build_ = true;
return op_downstream_map_; return op_downstream_map_;
} }
...@@ -106,15 +116,6 @@ const std::map<size_t, std::set<size_t>>& DependencyBuilder::OpDownstreamMap() ...@@ -106,15 +116,6 @@ const std::map<size_t, std::set<size_t>>& DependencyBuilder::OpDownstreamMap()
return op_downstream_map_; return op_downstream_map_;
} }
bool DependencyBuilder::OpHappensBefore(size_t prior_op_idx,
size_t posterior_op_idx) const {
PADDLE_ENFORCE_GE(
op_happens_before_.size(),
0,
phi::errors::Unavailable("op_happen_before is not yet built"));
return op_happens_before_.at(prior_op_idx).at(posterior_op_idx);
}
void DependencyBuilder::AddDependencyForCoalesceTensorOp() { void DependencyBuilder::AddDependencyForCoalesceTensorOp() {
for (size_t op_idx = 0; op_idx < op_num_; ++op_idx) { for (size_t op_idx = 0; op_idx < op_num_; ++op_idx) {
if (instructions_->at(op_idx).OpBase()->Type() == kCoalesceTensor) { if (instructions_->at(op_idx).OpBase()->Type() == kCoalesceTensor) {
...@@ -287,7 +288,7 @@ void DependencyBuilder::AddDependencyForReadOp() { ...@@ -287,7 +288,7 @@ void DependencyBuilder::AddDependencyForReadOp() {
for (size_t read_op_idx : read_ops) { for (size_t read_op_idx : read_ops) {
for (size_t downstream_op_idx : startup_ops) { for (size_t downstream_op_idx : startup_ops) {
if (read_op_idx != downstream_op_idx && if (read_op_idx != downstream_op_idx &&
!op_happens_before_[downstream_op_idx][read_op_idx]) { !OpHappensBefore(downstream_op_idx, read_op_idx)) {
AddDownstreamOp(read_op_idx, downstream_op_idx); AddDownstreamOp(read_op_idx, downstream_op_idx);
} }
} }
...@@ -308,42 +309,56 @@ void DependencyBuilder::AddDependencyForSequentialRun() { ...@@ -308,42 +309,56 @@ void DependencyBuilder::AddDependencyForSequentialRun() {
void DependencyBuilder::AddDownstreamOp(size_t prior_op_idx, void DependencyBuilder::AddDownstreamOp(size_t prior_op_idx,
size_t posterior_op_idx) { size_t posterior_op_idx) {
std::set<size_t>& downstream_ops = op_downstream_map_[prior_op_idx]; PADDLE_ENFORCE_EQ(
OpHappensBefore(posterior_op_idx, prior_op_idx),
false,
phi::errors::Unavailable(
"Can not add dependency %d->%d because %d is run before %d",
prior_op_idx,
posterior_op_idx,
posterior_op_idx,
prior_op_idx));
if (op_happens_before_.size() != 0) { std::set<size_t>& downstream_ops = op_downstream_map_[prior_op_idx];
PADDLE_ENFORCE_EQ( // NOTE(Ruibiao): Here the downstream map shrinking is best-effort, therefore
op_happens_before_[posterior_op_idx][prior_op_idx], // ShrinkDownstreamMap after BuildDownstreamMap is still helpful. For example,
false, // a->c will not be shrinked in the following case: AddDownstreamOp(a, b) ->
phi::errors::Unavailable( // AddDownstreamOp(a, c) -> AddDownstreamOp(b, c), it should be shrinked by
"Can not add dependency %d->%d because %d is run before %d", // ShrinkDownstreamMap.
prior_op_idx, for (size_t op_idx : downstream_ops) {
posterior_op_idx, if (OpHappensBefore(op_idx, posterior_op_idx)) {
posterior_op_idx, VLOG(7) << "Find dependencies " << prior_op_idx << "->" << op_idx << "->"
prior_op_idx)); << posterior_op_idx << ", skip adding " << prior_op_idx << "->"
<< posterior_op_idx;
for (size_t op_idx : downstream_ops) { return;
if (op_happens_before_[op_idx][posterior_op_idx]) {
VLOG(7) << "Find dependencies " << prior_op_idx << "->" << op_idx
<< "->" << posterior_op_idx << ", skip adding " << prior_op_idx
<< "->" << posterior_op_idx;
return;
}
} }
} }
downstream_ops.insert(posterior_op_idx); downstream_ops.insert(posterior_op_idx);
if (op_happens_before_.size() != 0) { std::vector<size_t> prior_of_prior = ops_before_[prior_op_idx];
for (size_t op_idx = 0; op_idx < op_num_; ++op_idx) { std::vector<size_t> posterior_of_posterior = ops_behind_[posterior_op_idx];
if (op_happens_before_[op_idx][prior_op_idx]) {
op_happens_before_[op_idx][posterior_op_idx] = true;
}
if (op_happens_before_[posterior_op_idx][op_idx]) { auto update_op_happen_before = [this](size_t prior_op_idx,
op_happens_before_[prior_op_idx][op_idx] = true; size_t posterior_op_idx) {
} if (!op_happens_before_[prior_op_idx][posterior_op_idx]) {
op_happens_before_[prior_op_idx][posterior_op_idx] = true;
ops_before_[posterior_op_idx].push_back(prior_op_idx);
ops_behind_[prior_op_idx].push_back(posterior_op_idx);
} }
};
update_op_happen_before(prior_op_idx, posterior_op_idx);
// All ops before prior-op are also before posterior-op
for (size_t op_idx : prior_of_prior) {
update_op_happen_before(op_idx, posterior_op_idx);
}
// All ops after posterior-op are also after prior-op
for (size_t op_idx : posterior_of_posterior) {
update_op_happen_before(prior_op_idx, op_idx);
} }
VLOG(8) << prior_op_idx << "->" << posterior_op_idx; VLOG(8) << prior_op_idx << "->" << posterior_op_idx;
VLOG(8) << "Add dependency from " VLOG(8) << "Add dependency from "
<< instructions_->at(prior_op_idx).OpBase()->Type() << "(" << instructions_->at(prior_op_idx).OpBase()->Type() << "("
...@@ -468,46 +483,6 @@ void DependencyBuilder::BuildDownstreamMap() { ...@@ -468,46 +483,6 @@ void DependencyBuilder::BuildDownstreamMap() {
} }
} }
void DependencyBuilder::BuildOpHappensBefore() {
// happens_before[i][j] means i should be executed before j
op_happens_before_.assign(op_num_, std::vector<bool>(op_num_, false));
// bfs to get all next ops
auto bfs = [&](size_t op_idx) {
std::queue<size_t> q;
std::vector<bool> visited(op_num_, false);
q.push(op_idx);
while (!q.empty()) {
size_t op = q.front();
q.pop();
visited[op] = true;
if (!op_downstream_map_.count(op)) {
continue;
}
for (auto next : op_downstream_map_.at(op)) {
if (!visited[next]) {
PADDLE_ENFORCE_EQ(op_happens_before_[next][op_idx],
false,
paddle::platform::errors::AlreadyExists(
"There exists circle in graph, expected "
"%d->%d, but already got %d->%d",
op_idx,
next,
next,
op_idx));
op_happens_before_[op_idx][next] = true;
VLOG(10) << "happens before: " << op_idx << " " << next;
q.push(next);
}
}
}
};
for (size_t i = 0; i < op_num_; ++i) {
bfs(i);
}
}
void DependencyBuilder::ShrinkDownstreamMap() { void DependencyBuilder::ShrinkDownstreamMap() {
// remove unnecessary downstream ops // remove unnecessary downstream ops
// for example, a->b->c // for example, a->b->c
...@@ -529,7 +504,7 @@ void DependencyBuilder::ShrinkDownstreamMap() { ...@@ -529,7 +504,7 @@ void DependencyBuilder::ShrinkDownstreamMap() {
bool not_after_any = true; bool not_after_any = true;
// find the op that is not executed after any // find the op that is not executed after any
for (size_t other_item : op_downstream_map_.at(i)) { for (size_t other_item : op_downstream_map_.at(i)) {
if (op_happens_before_[other_item][item]) { if (OpHappensBefore(other_item, item)) {
VLOG(8) << "happens_before: " << other_item << "->" << item VLOG(8) << "happens_before: " << other_item << "->" << item
<< ", so skip " << item; << ", so skip " << item;
not_after_any = false; not_after_any = false;
...@@ -541,6 +516,8 @@ void DependencyBuilder::ShrinkDownstreamMap() { ...@@ -541,6 +516,8 @@ void DependencyBuilder::ShrinkDownstreamMap() {
minumum_nexts.insert(item); minumum_nexts.insert(item);
} }
} }
// NOTE(Ruibiao): op_happens_before will not be changed when shrink
// dowstream map
op_downstream_map_.at(i) = minumum_nexts; op_downstream_map_.at(i) = minumum_nexts;
} }
VLOG(8) << "Finish shrink downstream map"; VLOG(8) << "Finish shrink downstream map";
......
...@@ -40,7 +40,13 @@ class DependencyBuilder { ...@@ -40,7 +40,13 @@ class DependencyBuilder {
const std::map<size_t, std::set<size_t>>& OpDownstreamMap() const; const std::map<size_t, std::set<size_t>>& OpDownstreamMap() const;
bool OpHappensBefore(size_t prior_op_idx, size_t posterior_op_idx) const; bool OpHappensBefore(size_t prior_op_idx, size_t posterior_op_idx) const {
PADDLE_ENFORCE_GE(
op_happens_before_.size(),
0,
phi::errors::Unavailable("op_happen_before is not yet built"));
return op_happens_before_.at(prior_op_idx).at(posterior_op_idx);
}
private: private:
void AddDependencyForCoalesceTensorOp(); void AddDependencyForCoalesceTensorOp();
...@@ -53,21 +59,27 @@ class DependencyBuilder { ...@@ -53,21 +59,27 @@ class DependencyBuilder {
void BuildDownstreamMap(); void BuildDownstreamMap();
void BuildOpHappensBefore();
void ShrinkDownstreamMap(); void ShrinkDownstreamMap();
bool is_build_; bool is_build_;
const std::vector<Instruction>* instructions_; // not_own const std::vector<Instruction>* instructions_; // not_own
size_t op_num_; size_t op_num_;
// op_happens_before_[i][j] == true means op[i] happens before op[j] // ops_behind_ is the adjacency list about op to its posterior-ops, that is to
std::vector<std::vector<bool>> op_happens_before_; // say, op_behind_[i] == {a, b, c} means op[a], op[b] and op[c] depend on
// op[i] directly or indirectly. ops_before_ is the revered adjacency list of
// ops_behind_.
std::vector<std::vector<size_t>> ops_before_;
std::vector<std::vector<size_t>> ops_behind_;
// op_downstream_map_ is the mapping from op to its downstream-op set, that is // op_downstream_map_ is the mapping from op to its downstream-op set, that is
// to say, op_downstream_map_[i] == {a, b, c} means op[a], op[b] and op[c] // to say, op_downstream_map_[i] == {a, b, c} means op[a], op[b] and op[c]
// should be dispatched after op[i] // depend on op[i] directly.
std::map<size_t, std::set<size_t>> op_downstream_map_; std::map<size_t, std::set<size_t>> op_downstream_map_;
// op_happens_before_ is a matrix form of ops_before_ and ops_behind_, it is
// used to speed up the query.
std::vector<std::vector<bool>> op_happens_before_;
}; };
} // namespace interpreter } // namespace interpreter
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册