未验证 提交 6b151c0e 编写于 作者: R Ruibiao Chen 提交者: GitHub

Reduce time cost of BuildOpHappensBefore (#50137)

* Reduce time cost of BuildOpHappensBefore

* Update code

* Update code

* Improve data struct
上级 5b993f2b
......@@ -69,9 +69,15 @@ const std::map<size_t, std::set<size_t>>& DependencyBuilder::Build(
instructions_ = &instructions;
op_num_ = instructions_->size();
ops_before_.assign(op_num_, {});
ops_behind_.assign(op_num_, {});
op_happens_before_.assign(op_num_, std::vector<bool>(op_num_, false));
BuildDownstreamMap();
BuildOpHappensBefore();
VLOG(6) << "Finish BuildDownstreamMap";
ShrinkDownstreamMap();
VLOG(6) << "Finish ShrinkDownstreamMap";
if (FLAGS_new_executor_sequential_run) {
AddDependencyForSequentialRun();
......@@ -81,18 +87,22 @@ const std::map<size_t, std::set<size_t>>& DependencyBuilder::Build(
if (FLAGS_add_dependency_for_communication_op) {
AddDependencyForCommunicationOp();
VLOG(6) << "Finish AddDependencyForSequentialRun";
}
AddDependencyForRandomOp();
AddDependencyForReadOp();
VLOG(6) << "Finish AddDependencyForRandomOp";
is_build_ = true;
AddDependencyForReadOp();
VLOG(6) << "Finish AddDependencyForReadOp";
VLOG(8) << "Finish build dependency";
VLOG(6) << "Finish build dependency";
VLOG(8) << "downstream count: " << CountDownstreamMap(op_downstream_map_);
VLOG(8) << "downstream_map: " << std::endl
<< StringizeDownstreamMap(op_downstream_map_);
is_build_ = true;
return op_downstream_map_;
}
......@@ -106,15 +116,6 @@ const std::map<size_t, std::set<size_t>>& DependencyBuilder::OpDownstreamMap()
return op_downstream_map_;
}
bool DependencyBuilder::OpHappensBefore(size_t prior_op_idx,
size_t posterior_op_idx) const {
PADDLE_ENFORCE_GE(
op_happens_before_.size(),
0,
phi::errors::Unavailable("op_happen_before is not yet built"));
return op_happens_before_.at(prior_op_idx).at(posterior_op_idx);
}
void DependencyBuilder::AddDependencyForCoalesceTensorOp() {
for (size_t op_idx = 0; op_idx < op_num_; ++op_idx) {
if (instructions_->at(op_idx).OpBase()->Type() == kCoalesceTensor) {
......@@ -287,7 +288,7 @@ void DependencyBuilder::AddDependencyForReadOp() {
for (size_t read_op_idx : read_ops) {
for (size_t downstream_op_idx : startup_ops) {
if (read_op_idx != downstream_op_idx &&
!op_happens_before_[downstream_op_idx][read_op_idx]) {
!OpHappensBefore(downstream_op_idx, read_op_idx)) {
AddDownstreamOp(read_op_idx, downstream_op_idx);
}
}
......@@ -308,42 +309,56 @@ void DependencyBuilder::AddDependencyForSequentialRun() {
void DependencyBuilder::AddDownstreamOp(size_t prior_op_idx,
size_t posterior_op_idx) {
std::set<size_t>& downstream_ops = op_downstream_map_[prior_op_idx];
PADDLE_ENFORCE_EQ(
OpHappensBefore(posterior_op_idx, prior_op_idx),
false,
phi::errors::Unavailable(
"Can not add dependency %d->%d because %d is run before %d",
prior_op_idx,
posterior_op_idx,
posterior_op_idx,
prior_op_idx));
if (op_happens_before_.size() != 0) {
PADDLE_ENFORCE_EQ(
op_happens_before_[posterior_op_idx][prior_op_idx],
false,
phi::errors::Unavailable(
"Can not add dependency %d->%d because %d is run before %d",
prior_op_idx,
posterior_op_idx,
posterior_op_idx,
prior_op_idx));
for (size_t op_idx : downstream_ops) {
if (op_happens_before_[op_idx][posterior_op_idx]) {
VLOG(7) << "Find dependencies " << prior_op_idx << "->" << op_idx
<< "->" << posterior_op_idx << ", skip adding " << prior_op_idx
<< "->" << posterior_op_idx;
return;
}
std::set<size_t>& downstream_ops = op_downstream_map_[prior_op_idx];
// NOTE(Ruibiao): Here the downstream map shrinking is best-effort, therefore
// ShrinkDownstreamMap after BuildDownstreamMap is still helpful. For example,
// a->c will not be shrinked in the following case: AddDownstreamOp(a, b) ->
// AddDownstreamOp(a, c) -> AddDownstreamOp(b, c), it should be shrinked by
// ShrinkDownstreamMap.
for (size_t op_idx : downstream_ops) {
if (OpHappensBefore(op_idx, posterior_op_idx)) {
VLOG(7) << "Find dependencies " << prior_op_idx << "->" << op_idx << "->"
<< posterior_op_idx << ", skip adding " << prior_op_idx << "->"
<< posterior_op_idx;
return;
}
}
downstream_ops.insert(posterior_op_idx);
if (op_happens_before_.size() != 0) {
for (size_t op_idx = 0; op_idx < op_num_; ++op_idx) {
if (op_happens_before_[op_idx][prior_op_idx]) {
op_happens_before_[op_idx][posterior_op_idx] = true;
}
std::vector<size_t> prior_of_prior = ops_before_[prior_op_idx];
std::vector<size_t> posterior_of_posterior = ops_behind_[posterior_op_idx];
if (op_happens_before_[posterior_op_idx][op_idx]) {
op_happens_before_[prior_op_idx][op_idx] = true;
}
auto update_op_happen_before = [this](size_t prior_op_idx,
size_t posterior_op_idx) {
if (!op_happens_before_[prior_op_idx][posterior_op_idx]) {
op_happens_before_[prior_op_idx][posterior_op_idx] = true;
ops_before_[posterior_op_idx].push_back(prior_op_idx);
ops_behind_[prior_op_idx].push_back(posterior_op_idx);
}
};
update_op_happen_before(prior_op_idx, posterior_op_idx);
// All ops before prior-op are also before posterior-op
for (size_t op_idx : prior_of_prior) {
update_op_happen_before(op_idx, posterior_op_idx);
}
// All ops after posterior-op are also after prior-op
for (size_t op_idx : posterior_of_posterior) {
update_op_happen_before(prior_op_idx, op_idx);
}
VLOG(8) << prior_op_idx << "->" << posterior_op_idx;
VLOG(8) << "Add dependency from "
<< instructions_->at(prior_op_idx).OpBase()->Type() << "("
......@@ -468,46 +483,6 @@ void DependencyBuilder::BuildDownstreamMap() {
}
}
void DependencyBuilder::BuildOpHappensBefore() {
// happens_before[i][j] means i should be executed before j
op_happens_before_.assign(op_num_, std::vector<bool>(op_num_, false));
// bfs to get all next ops
auto bfs = [&](size_t op_idx) {
std::queue<size_t> q;
std::vector<bool> visited(op_num_, false);
q.push(op_idx);
while (!q.empty()) {
size_t op = q.front();
q.pop();
visited[op] = true;
if (!op_downstream_map_.count(op)) {
continue;
}
for (auto next : op_downstream_map_.at(op)) {
if (!visited[next]) {
PADDLE_ENFORCE_EQ(op_happens_before_[next][op_idx],
false,
paddle::platform::errors::AlreadyExists(
"There exists circle in graph, expected "
"%d->%d, but already got %d->%d",
op_idx,
next,
next,
op_idx));
op_happens_before_[op_idx][next] = true;
VLOG(10) << "happens before: " << op_idx << " " << next;
q.push(next);
}
}
}
};
for (size_t i = 0; i < op_num_; ++i) {
bfs(i);
}
}
void DependencyBuilder::ShrinkDownstreamMap() {
// remove unnecessary downstream ops
// for example, a->b->c
......@@ -529,7 +504,7 @@ void DependencyBuilder::ShrinkDownstreamMap() {
bool not_after_any = true;
// find the op that is not executed after any
for (size_t other_item : op_downstream_map_.at(i)) {
if (op_happens_before_[other_item][item]) {
if (OpHappensBefore(other_item, item)) {
VLOG(8) << "happens_before: " << other_item << "->" << item
<< ", so skip " << item;
not_after_any = false;
......@@ -541,6 +516,8 @@ void DependencyBuilder::ShrinkDownstreamMap() {
minumum_nexts.insert(item);
}
}
// NOTE(Ruibiao): op_happens_before will not be changed when shrink
// dowstream map
op_downstream_map_.at(i) = minumum_nexts;
}
VLOG(8) << "Finish shrink downstream map";
......
......@@ -40,7 +40,13 @@ class DependencyBuilder {
const std::map<size_t, std::set<size_t>>& OpDownstreamMap() const;
bool OpHappensBefore(size_t prior_op_idx, size_t posterior_op_idx) const;
bool OpHappensBefore(size_t prior_op_idx, size_t posterior_op_idx) const {
PADDLE_ENFORCE_GE(
op_happens_before_.size(),
0,
phi::errors::Unavailable("op_happen_before is not yet built"));
return op_happens_before_.at(prior_op_idx).at(posterior_op_idx);
}
private:
void AddDependencyForCoalesceTensorOp();
......@@ -53,21 +59,27 @@ class DependencyBuilder {
void BuildDownstreamMap();
void BuildOpHappensBefore();
void ShrinkDownstreamMap();
bool is_build_;
const std::vector<Instruction>* instructions_; // not_own
size_t op_num_;
// op_happens_before_[i][j] == true means op[i] happens before op[j]
std::vector<std::vector<bool>> op_happens_before_;
// ops_behind_ is the adjacency list about op to its posterior-ops, that is to
// say, op_behind_[i] == {a, b, c} means op[a], op[b] and op[c] depend on
// op[i] directly or indirectly. ops_before_ is the revered adjacency list of
// ops_behind_.
std::vector<std::vector<size_t>> ops_before_;
std::vector<std::vector<size_t>> ops_behind_;
// op_downstream_map_ is the mapping from op to its downstream-op set, that is
// to say, op_downstream_map_[i] == {a, b, c} means op[a], op[b] and op[c]
// should be dispatched after op[i]
// depend on op[i] directly.
std::map<size_t, std::set<size_t>> op_downstream_map_;
// op_happens_before_ is a matrix form of ops_before_ and ops_behind_, it is
// used to speed up the query.
std::vector<std::vector<bool>> op_happens_before_;
};
} // namespace interpreter
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册