未验证 提交 3b00dc92 编写于 作者: L Leo Chen 提交者: GitHub

add depend when doing fuse_all_optimizer on program (#41178)

* fix dependency of fused optimizer

* add ut
上级 4e3c7338
......@@ -622,6 +622,23 @@ void FuseOptimizerOpPass::InsertInputAndOutputForFusedOpNode(
}
outputs.insert(out_dep_vars.begin(), out_dep_vars.end());
auto nodes_to_string =
[](std::unordered_set<ir::Node *> nodes) -> std::string {
std::stringstream ss;
for (auto n : nodes) {
if (n->IsVar()) {
ss << n->Name() << " ";
}
}
return ss.str();
};
VLOG(4) << "add inputs to " << fused_opt_node->Op()->Type() << ": "
<< nodes_to_string(inputs);
VLOG(4) << "add outputs to " << fused_opt_node->Op()->Type() << ": "
<< nodes_to_string(outputs);
fused_opt_node->inputs.insert(fused_opt_node->inputs.begin(), inputs.begin(),
inputs.end());
fused_opt_node->outputs.insert(fused_opt_node->outputs.begin(),
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/graph_helper.h"
#include <queue>
#include <stack>
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/op_proto_maker.h"
DECLARE_bool(convert_all_blocks);
......@@ -449,6 +450,19 @@ static OpDesc *ReplaceScaleLossGradOp(const Node &node, OpDesc *desc) {
static void GetGraphOpDesc(const std::vector<Node *> &nodes,
std::vector<OpDesc> *ops) {
auto is_fused_opt = [](Node *n) -> bool {
auto op_type = n->Op()->Type();
auto is_opt =
(op_type == "adam" || op_type == "momentum" || op_type == "sgd");
auto input_names = n->Op()->InputArgumentNames();
auto contains_fused_var = std::any_of(
input_names.begin(), input_names.end(), [](std::string name) {
return name.find(details::kFusedVarNamePrefix) != std::string::npos;
});
VLOG(4) << is_opt << " " << contains_fused_var;
return is_opt && contains_fused_var;
};
for (Node *n : nodes) {
// if node is not Op, skip
if (!n->IsOp()) continue;
......@@ -459,6 +473,26 @@ static void GetGraphOpDesc(const std::vector<Node *> &nodes,
auto &desc = ops->back();
ReplaceScaleLossGradOp(*n, &desc);
} else if (n->Op()) {
VLOG(4) << "convert op node to desc " << n->Op()->Type();
VLOG(4) << n->ToString();
if (is_fused_opt(n)) {
OpDesc depend_desc(n->Op()->Block());
std::vector<std::string> deps;
for (auto in : n->inputs) {
if (in->IsVar() && !in->IsCtrlVar()) {
deps.push_back(in->Name());
}
}
depend_desc.SetType("depend");
depend_desc.SetInput("X",
n->Op()->Inputs().at(n->Op()->InputNames()[0]));
depend_desc.SetInput("Dep", deps);
depend_desc.SetOutput("Out",
n->Op()->Inputs().at(n->Op()->InputNames()[0]));
ops->emplace_back(depend_desc);
VLOG(4) << "add depend op";
}
ops->emplace_back(*n->Op());
}
// delete no OpDesc op
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
namespace paddle {
namespace framework {
class OpDesc;
class Scope;
template <typename T>
class EmptyGradOpMaker;
} // namespace framework
namespace imperative {
class OpBase;
} // namespace imperative
} // namespace paddle
namespace paddle {
namespace operators {
class DependOp : public framework::OperatorBase {
public:
DependOp(const std::string &type, const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: OperatorBase(type, inputs, outputs, attrs) {}
private:
void RunImpl(const framework::Scope &scope,
const platform::Place &place) const override {
// NOTE(zhiqiu): depend op has empty compute, and it
// can be skiped in the executor.
OP_INOUT_CHECK(HasInputs("X"), "Input", "X", "Feed");
OP_INOUT_CHECK(HasOutputs("Out"), "Output", "Out", "Feed");
auto x_name = Input("X");
auto out_name = Output("Out");
PADDLE_ENFORCE_EQ(x_name, out_name,
platform::errors::PreconditionNotMet(
"Input(X) and Output(Out) varibale should be the "
"same, but got Input is %s and Output is %s.",
x_name, out_name));
return;
}
};
class DependOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X", "Tensor, the dependence is added for.");
AddInput("Dep", "The tensors that should be generated before X.")
.AsDuplicable();
AddOutput("Out", "Tensor, the same as input X");
AddComment(R"DOC(
Depend Operator, allows to add explicit dependency between tensors.
For example, given two ops:
b = opA(a)
y = opB(x)
if tensor b and tensor x has some inner dependency, for example, x share data with b,
we need to add explicit dependency for x <- b, otherwise the these two operators may
be executed parellel in static graph. We can use depend op as below,
b = opA(a)
x = depend(x, b)
y = opB(x)
)DOC");
}
};
} // namespace operators
} // namespace paddle
REGISTER_OPERATOR(
depend, paddle::operators::DependOp,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
paddle::operators::DependOpProtoMaker);
......@@ -114,6 +114,7 @@ class TestIRPassBase(unittest.TestCase):
# fused all optimizer pass requires this
if paddle.is_compiled_with_cuda():
self.assertTrue(global_block_contains_op(main, "coalesce_tensor"))
self.assertTrue(global_block_contains_op(main, "depend"))
self.assertTrue(
global_block_contains_op(main, "fused_elemwise_add_activation"))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册