diff --git a/paddle/fluid/framework/ir/fusion_group/code_generator.cc b/paddle/fluid/framework/ir/fusion_group/code_generator.cc index b2a6aec0ad282d50c504f79067e4ceb4372d5a2a..c41a2ed83591459289647ddad59db1636aabd3f7 100644 --- a/paddle/fluid/framework/ir/fusion_group/code_generator.cc +++ b/paddle/fluid/framework/ir/fusion_group/code_generator.cc @@ -36,6 +36,16 @@ std::string CodeGenerator::Generate(SubGraph* subgraph) { return Generate(subgraph->func_name, expressions); } +static bool HasInput(Node* n, std::string name) { + PADDLE_ENFORCE_EQ(n && n->IsOp() && n->Op(), true, + platform::errors::InvalidArgument( + "Expected node %p to be an operator node.", n)); + std::vector input_names = n->Op()->InputNames(); + std::unordered_set input_names_set(input_names.begin(), + input_names.end()); + return input_names_set.find(name) != input_names_set.end(); +} + std::vector CodeGenerator::ConvertToExpressions( SubGraph* subgraph) { std::unordered_map var_ids = EncodeVarNodes(subgraph); @@ -45,19 +55,20 @@ std::vector CodeGenerator::ConvertToExpressions( auto* op = node->Op(); // Input ids should be set in fixed order, like: - // - x, y in forward operations - // - x, y, out, out@GRAD in backward operations + // - X, Y in forward operations + // - X, Y, Out, out@GRAD in backward operations std::vector input_ids; std::vector input_names = OperationMap::Instance().Get(op->Type()).input_names; for (auto& name : input_names) { - // TODO(liuyiqun): support duplicated input. - if (op->Input(name).size() >= 1U) { - // Some input vars are not used in grad ops, such as - // "elementwise_add_grad", where "X", "Y" and "Out" are not used. - PADDLE_ENFORCE_NE(var_ids.find(op->Input(name)[0]), var_ids.end(), - "Input(%s) of operation %s should be set.", name, - op->Type()); + // Some input vars are not used in grad ops, such as + // "elementwise_add_grad", where "X", "Y" and "Out" are not used. + if (HasInput(node, name) && op->Input(name).size() >= 1U) { + // TODO(liuyiqun): support duplicated input. + PADDLE_ENFORCE_NE( + var_ids.find(op->Input(name)[0]), var_ids.end(), + platform::errors::InvalidArgument( + "Input(%s) of operation %s is not set.", name, op->Type())); input_ids.push_back(var_ids[op->Input(name)[0]]); } else { input_ids.push_back(-1); @@ -69,12 +80,14 @@ std::vector CodeGenerator::ConvertToExpressions( std::vector output_names = OperationMap::Instance().Get(op->Type()).output_names; for (auto& name : output_names) { - PADDLE_ENFORCE_EQ(op->Output(name).size(), 1U, - "Output(%s) of operation %s should be set.", name, - op->Type()); - PADDLE_ENFORCE_NE(var_ids.find(op->Output(name)[0]), var_ids.end(), - "Output(%s) of operation %s should be set.", name, - op->Type()); + PADDLE_ENFORCE_EQ( + op->Output(name).size(), 1U, + platform::errors::InvalidArgument( + "Output(%s) of operation %s is not set.", name, op->Type())); + PADDLE_ENFORCE_NE( + var_ids.find(op->Output(name)[0]), var_ids.end(), + platform::errors::InvalidArgument( + "Output(%s) of operation %s is not set.", name, op->Type())); output_ids.push_back(var_ids[op->Output(name)[0]]); } expressions.push_back( @@ -218,8 +231,9 @@ std::unordered_map CodeGenerator::EncodeVarNodes( } PADDLE_ENFORCE_EQ( is_found, true, - "Subgraph with internal var nodes (%s) is not supported yet.", - node->Name()); + platform::errors::Unimplemented( + "Subgraph with internal var nodes (%s) is not supported yet.", + node->Name())); } } // Encoding output vars. diff --git a/paddle/fluid/framework/ir/fusion_group/code_generator_helper.cc b/paddle/fluid/framework/ir/fusion_group/code_generator_helper.cc index 38d5ebde87e3208ae3111dd591db9d5a76de4d84..be06a620f78ae31e6fd457f52e7141b50c1570d3 100644 --- a/paddle/fluid/framework/ir/fusion_group/code_generator_helper.cc +++ b/paddle/fluid/framework/ir/fusion_group/code_generator_helper.cc @@ -45,11 +45,16 @@ std::string OperationExpression::GetRHS(std::unordered_set* used, } std::string index_str = rhs.substr(pos + 2, length); int index = StringTo(index_str); - PADDLE_ENFORCE_LT(index, input_ids_.size(), - "Only %d inputs are provided, but need %d.", - input_ids_.size(), index + 1); - PADDLE_ENFORCE_GE(input_ids_[index], 0, - "Input id should be no less than 0."); + PADDLE_ENFORCE_LT( + index, input_ids_.size(), + platform::errors::InvalidArgument( + "Only %d inputs are provided, but need %d for operation < %s >.", + input_ids_.size(), index + 1, op_type_)); + PADDLE_ENFORCE_GE( + input_ids_[index], 0, + platform::errors::InvalidArgument( + "Expected %d-th input id > 0 for operation < %s >. Received %d.", + index, op_type_, input_ids_[index])); rhs.replace(pos, length + 3, TmpName(input_ids_[index])); used->insert(input_ids_[index]); } diff --git a/paddle/fluid/framework/ir/fusion_group/code_generator_helper.h b/paddle/fluid/framework/ir/fusion_group/code_generator_helper.h index ed4ce665d2b6ba43d17538c60e3517064e7430e4..140e0d3a06bb302f305409f2d2031a605a289c8f 100644 --- a/paddle/fluid/framework/ir/fusion_group/code_generator_helper.h +++ b/paddle/fluid/framework/ir/fusion_group/code_generator_helper.h @@ -113,7 +113,8 @@ class CodeTemplate { for (auto iter : template_var.Get()) { PADDLE_ENFORCE_NE(found.find(iter.first), found.end(), - "Keyword %s in template is not set.", iter.first); + platform::errors::PreconditionNotMet( + "Keyword %s in template is not set.", iter.first)); } return EmitIndents(ret); diff --git a/paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc b/paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc index bc9f2408b67cd1b1d47470795a5cc82e025c46c5..9515237f964409adc083f83a03acde85983b32a4 100644 --- a/paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc +++ b/paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc @@ -35,7 +35,7 @@ namespace fusion_group { inline float relu(float x) { return x > 0 ? x : 0.; } inline float relu_grad_dx(float x, float out, float dout) { - return x > 0 ? dout : 0; + return out > 0 ? dout : 0; } // sigmoid @@ -117,7 +117,7 @@ void CheckOutput(const std::vector& expressions, elementwise_mul(var[input_ids[0]], var[input_ids[1]]); } else if (op_type == "relu_grad") { var[output_ids[0]] = - relu_grad_dx(var[input_ids[0]], 0, var[input_ids[2]]); + relu_grad_dx(0, var[input_ids[1]], var[input_ids[2]]); } else if (op_type == "sigmoid_grad") { var[output_ids[0]] = sigmoid_grad_dx(0, var[input_ids[1]], var[input_ids[2]]); @@ -138,8 +138,7 @@ void CheckOutput(const std::vector& expressions, for (auto id : output_ids_of_subgraph) { float actual = cpu_tensors[id].data()[i]; float expect = var[id]; - PADDLE_ENFORCE_LT(fabs(actual - expect), 1.E-05, - "Get %f vs %f (actual vs expect).", actual, expect); + EXPECT_LT(fabs(actual - expect), 1.E-05); } } @@ -150,8 +149,7 @@ void SetupRandomCPUTensor(LoDTensor* tensor) { std::uniform_real_distribution uniform_dist(0, 1); T* ptr = tensor->data(); - PADDLE_ENFORCE_NOT_NULL( - ptr, "Call mutable_data to alloc memory for Tensor first."); + EXPECT_NE(ptr, nullptr); for (int64_t i = 0; i < tensor->numel(); ++i) { ptr[i] = static_cast(uniform_dist(rng)) - static_cast(0.5); } @@ -283,7 +281,7 @@ TEST(code_generator, elementwise_grad) { // t3 = relu(t2) // t2' = relu_grad(t2, t3, t3') // t0', t1' = elementwise_mul_grad(t0, t1, t2, t2') - fusion_group::OperationExpression exp1("relu_grad", {2, -1, 7}, {6}); + fusion_group::OperationExpression exp1("relu_grad", {-1, 3, 7}, {6}); fusion_group::OperationExpression exp2("elementwise_mul_grad", {0, 1, 2, 6}, {4, 5}); std::vector expressions = {exp1, exp2}; @@ -300,7 +298,7 @@ TEST(code_generator, elementwise_grad) { // Op(relu_grad), inputs:{2,3,7}, outputs:{6} // Op(elementwise_mul_grad), inputs:{0,1,2,6}, outputs:{4,5} int n = cpu_tensors[0].numel(); - std::vector input_ids = {0, 1, 2, -1, 7}; + std::vector input_ids = {0, 1, 2, 3, 7}; std::vector output_ids = {4, 5, 6}; TestMain("elementwise_grad_kernel_0", expressions, cpu_tensors, n, input_ids, output_ids); @@ -332,22 +330,25 @@ std::unique_ptr BuildGraph( // tmp_2@GRAD(13), x2@GRAD(14), x0@GRAD(15), // x3@GRAD(16), x1@GRAD(17) paddle::framework::ir::Layers layers; - auto* x0 = layers.data("x0", {16, 32}); + std::vector shape = {16, 32}; + auto* x0 = layers.data("x0", shape); auto* tmp_0 = layers.sigmoid(x0); - tmp_0->SetShape({16, 32}); - auto* x1 = layers.data("x1", {16, 32}); + auto* x1 = layers.data("x1", shape); auto* tmp_1 = layers.elementwise_mul(tmp_0, x1); - tmp_1->SetShape({16, 32}); - auto* x2 = layers.data("x2", {16, 32}); + auto* x2 = layers.data("x2", shape); auto* tmp_2 = layers.tanh(x2); - tmp_2->SetShape({16, 32}); - auto* x3 = layers.data("x3", {16, 32}); + auto* x3 = layers.data("x3", shape); auto* tmp_3 = layers.elementwise_mul(x3, tmp_2); - tmp_3->SetShape({16, 32}); - layers.elementwise_add(tmp_1, tmp_3); + auto* tmp_4 = layers.elementwise_add(tmp_1, tmp_3); + + std::vector elementwise_vars = { + tmp_0, tmp_1, tmp_2, tmp_3, tmp_4}; + for (auto* var : elementwise_vars) { + var->SetShape(shape); + } if (backward) { - layers.backward(); + layers.backward({tmp_4}); } std::unique_ptr graph( diff --git a/paddle/fluid/framework/ir/fusion_group/fusion_group_pass_tester.cc b/paddle/fluid/framework/ir/fusion_group/fusion_group_pass_tester.cc index a023897d121ec3df019c8f796fa98b56f5d69b43..172ec0c0ee84d06440626caad4771199e5771aae 100644 --- a/paddle/fluid/framework/ir/fusion_group/fusion_group_pass_tester.cc +++ b/paddle/fluid/framework/ir/fusion_group/fusion_group_pass_tester.cc @@ -22,17 +22,15 @@ namespace paddle { namespace framework { namespace ir { -void VisualizeGraph(std::unique_ptr graph, std::string graph_viz_path) { +void VisualizeGraph(std::unique_ptr* graph, std::string graph_viz_path) { // Insert a graph_viz_pass to transform the graph to a .dot file. // It can be used for debug. auto graph_viz_pass = PassRegistry::Instance().Get("graph_viz_pass"); graph_viz_pass->Set("graph_viz_path", new std::string(graph_viz_path)); - graph.reset(graph_viz_pass->Apply(graph.release())); + graph->reset(graph_viz_pass->Apply(graph->release())); } -TEST(FusionGroupPass, elementwise_list) { - fusion_group::OperationMap::Init(); - +std::unique_ptr BuildElementwiseListGraph(bool backward = false) { // inputs operator output // -------------------------------------------------------- // (x, y) mul -> tmp_0 @@ -42,34 +40,33 @@ TEST(FusionGroupPass, elementwise_list) { // // Expression: tmp_3 = relu(mul(x, y) + z) + w Layers layers; + std::vector shape = {16, 32}; auto* x = layers.data("x", {16, 16}); auto* y = layers.data("y", {16, 32}); auto* tmp_0 = layers.mul(x, y); - tmp_0->SetShape({16, 32}); - auto* z = layers.data("z", {16, 32}); + auto* z = layers.data("z", shape); auto* tmp_1 = layers.elementwise_add(tmp_0, z); auto* tmp_2 = layers.relu(tmp_1); - tmp_2->SetShape({16, 32}); - auto* w = layers.data("w", {16, 32}); - layers.elementwise_add(tmp_2, w); - - std::unique_ptr graph(new Graph(layers.main_program())); - // VisualizeGraph(graph, "00_elementwise_list.dot"); - - auto fusion_group_pass = PassRegistry::Instance().Get("fusion_group_pass"); - VLOG(3) << DebugString(graph); + auto* w = layers.data("w", shape); + auto* tmp_3 = layers.elementwise_add(tmp_2, w); + std::vector elementwise_vars = {tmp_0, z, tmp_1, tmp_2, w, tmp_3}; + for (auto* var : elementwise_vars) { + var->SetShape(shape); + } - graph.reset(fusion_group_pass->Apply(graph.release())); - // VisualizeGraph(graph, "01_elementwise_list.fusion_group.dot"); - int num_fusion_group_ops = GetNumOpNodes(graph, "fusion_group"); - VLOG(3) << DebugString(graph); + if (backward) { + layers.backward({tmp_3}); + } - PADDLE_ENFORCE_EQ(num_fusion_group_ops, 1); + std::unique_ptr graph(new Graph(layers.main_program())); +#ifdef __clang__ + return graph; +#else + return std::move(graph); +#endif } -TEST(FusionGroupPass, elementwise_tree) { - fusion_group::OperationMap::Init(); - +std::unique_ptr BuildElementwiseTreeGraph(bool backward = false) { // inputs operator output // -------------------------------------------------------- // (x0, y0) mul -> tmp_0 @@ -88,53 +85,72 @@ TEST(FusionGroupPass, elementwise_tree) { // tmp_9 = tanh(x4) * sigmoid(x5) // tmp_10 = mul(tmp_6, tmp_9) Layers layers; + std::vector shape = {16, 32}; auto* x0 = layers.data("x0", {16, 16}); auto* y0 = layers.data("y0", {16, 32}); auto* tmp_0 = layers.mul(x0, y0); - tmp_0->SetShape({16, 32}); - - auto* x1 = layers.data("x1", {16, 32}); + auto* x1 = layers.data("x1", shape); auto* tmp_1 = layers.sigmoid(x1); - tmp_1->SetShape({16, 32}); - auto* tmp_2 = layers.elementwise_mul(tmp_0, tmp_1); - tmp_2->SetShape({16, 32}); - - auto* x2 = layers.data("x2", {16, 32}); + auto* x2 = layers.data("x2", shape); auto* tmp_3 = layers.sigmoid(x2); - tmp_3->SetShape({16, 32}); - auto* x3 = layers.data("x3", {16, 32}); + auto* x3 = layers.data("x3", shape); auto* tmp_4 = layers.tanh(x3); - tmp_4->SetShape({16, 32}); auto* tmp_5 = layers.elementwise_mul(tmp_3, tmp_4); - tmp_5->SetShape({16, 32}); - auto* tmp_6 = layers.elementwise_add(tmp_2, tmp_5); - tmp_6->SetShape({16, 32}); - - auto* x4 = layers.data("x4", {16, 32}); + auto* x4 = layers.data("x4", shape); auto* tmp_7 = layers.tanh(x4); - tmp_7->SetShape({16, 32}); - auto* x5 = layers.data("x5", {16, 32}); + auto* x5 = layers.data("x5", shape); auto* tmp_8 = layers.sigmoid(x5); - tmp_8->SetShape({16, 32}); - auto* tmp_9 = layers.elementwise_mul(tmp_7, tmp_8); - tmp_9->SetShape({16, 32}); - layers.mul(tmp_6, tmp_9); + auto* tmp_10 = layers.mul(tmp_6, tmp_9); + + std::vector elementwise_vars = {tmp_0, tmp_1, tmp_2, tmp_3, tmp_4, + tmp_5, tmp_6, tmp_7, tmp_8, tmp_9}; + for (auto* var : elementwise_vars) { + var->SetShape(shape); + } + + if (backward) { + layers.backward({tmp_10}); + } std::unique_ptr graph(new Graph(layers.main_program())); - // VisualizeGraph(graph, "00_elementwise_tree.dot"); +#ifdef __clang__ + return graph; +#else + return std::move(graph); +#endif +} - auto fusion_group_pass = PassRegistry::Instance().Get("fusion_group_pass"); +int TestMain(std::unique_ptr graph, std::string prefix) { + // VisualizeGraph(&graph, prefix + ".dot"); + auto pass = PassRegistry::Instance().Get("fusion_group_pass"); + pass->Set("use_gpu", new bool(true)); VLOG(3) << DebugString(graph); - graph.reset(fusion_group_pass->Apply(graph.release())); - // VisualizeGraph(graph, "01_elementwise_tree.fusion_group.dot"); + graph.reset(pass->Apply(graph.release())); + // VisualizeGraph(&graph, prefix + ".fusion_group.dot"); int num_fusion_group_ops = GetNumOpNodes(graph, "fusion_group"); VLOG(3) << DebugString(graph); - PADDLE_ENFORCE_EQ(num_fusion_group_ops, 2); + return num_fusion_group_ops; +} + +TEST(FusionGroupPass, elementwise_list) { + fusion_group::OperationMap::Init(); + + std::unique_ptr graph = BuildElementwiseListGraph(false); + int num_fusion_group_ops = TestMain(std::move(graph), "elementwise_list"); + EXPECT_EQ(num_fusion_group_ops, 1); +} + +TEST(FusionGroupPass, elementwise_tree) { + fusion_group::OperationMap::Init(); + + std::unique_ptr graph = BuildElementwiseTreeGraph(false); + int num_fusion_group_ops = TestMain(std::move(graph), "elementwise_tree"); + EXPECT_EQ(num_fusion_group_ops, 2); } } // namespace ir diff --git a/paddle/fluid/framework/ir/fusion_group/operation.cc b/paddle/fluid/framework/ir/fusion_group/operation.cc index ad89cc13cb2a1c65b2005f530454c0aab2bfa467..912044611f6f6de00a9ba0c7fbe2d36eb5cd4ceb 100644 --- a/paddle/fluid/framework/ir/fusion_group/operation.cc +++ b/paddle/fluid/framework/ir/fusion_group/operation.cc @@ -43,7 +43,11 @@ void OperationMap::Insert(int type, int num_operands, std::string op_type, std::vector input_names, std::vector output_names) { Operation op(type, num_operands, op_type, {expr}, input_names, output_names); - PADDLE_ENFORCE_EQ(op.IsValid(), true, "Operation %s is invalid.", op_type); + PADDLE_ENFORCE_EQ(op.IsValid(), true, + platform::errors::InvalidArgument( + "Operation %s is invalid. Please set correct " + "expression for forward calculation.", + op_type)); operations_[op_type] = op; if (grad_exprs.size() > 0U) { @@ -63,8 +67,11 @@ void OperationMap::Insert(int type, int num_operands, std::string op_type, } Operation grad_op(type, num_operands, grad_op_type, grad_exprs, grad_input_names, grad_output_names); - PADDLE_ENFORCE_EQ(grad_op.IsValid(), true, "Operation %s is invalid.", - grad_op_type); + PADDLE_ENFORCE_EQ(grad_op.IsValid(), true, + platform::errors::InvalidArgument( + "Operation %s is invalid. Please set correct " + "expression for backward calculation.", + grad_op_type)); operations_[grad_op_type] = grad_op; } } @@ -83,8 +90,8 @@ void OperationMap::InsertUnaryElementwiseOperations() { // relu: // out = f(x) = x > 0 ? x : 0 - // dx = dout * (out > 0 ? 1 : 0) = dout * (x > 0 ? 1 : 0) - insert_handler("relu", "real_max(${0}, 0)", {"${0} > 0 ? ${2} : 0"}); + // dx = dout * (out > 0 ? 1 : 0) + insert_handler("relu", "real_max(${0}, 0)", {"${1} > 0 ? ${2} : 0"}); // sigmoid: // out = f(x) = 1.0 / (1.0 + exp(-x)) // dx = dout * out * (1 - out) diff --git a/paddle/fluid/framework/ir/fusion_group/operation.h b/paddle/fluid/framework/ir/fusion_group/operation.h index 3145e3052efb6b5bbd0fc25e75f77e9590b9b146..d23bea8a437090d9effb8259df0ab120e1b66e08 100644 --- a/paddle/fluid/framework/ir/fusion_group/operation.h +++ b/paddle/fluid/framework/ir/fusion_group/operation.h @@ -70,7 +70,10 @@ class OperationMap { OperationMap(); static OperationMap& Instance() { - PADDLE_ENFORCE_NOT_NULL(map, "Need to initialize OperationMap first!"); + PADDLE_ENFORCE_NOT_NULL( + map, platform::errors::PreconditionNotMet( + "Please initialize OperationMap first, by calling " + "framework::fusion_group::OperationMap::Init()!")); return *map; } diff --git a/paddle/fluid/framework/ir/pass_tester_helper.h b/paddle/fluid/framework/ir/pass_tester_helper.h index a89f0c3b661db7d9b9794d6edb792654846d09f5..3c74612552597ff469a3e32ccb0f59891bdf437d 100644 --- a/paddle/fluid/framework/ir/pass_tester_helper.h +++ b/paddle/fluid/framework/ir/pass_tester_helper.h @@ -270,9 +270,19 @@ struct Layers { return outs; } - void backward() { + void backward(std::vector targets) { + // This function is designed to simulate the structure of training program, + // but is constructed differently as the actual program. BlockDesc* block = program_.MutableBlock(0); std::vector forward_ops = block->AllOps(); + for (auto* var : targets) { + OpDesc* none_op = block->AppendOp(); + none_op->SetType("none"); + none_op->SetInput("X", {var->Name()}); + VarDesc* grad_var = + lod_tensor(GradVarName(var->Name()), var->GetShape(), false); + none_op->SetOutput("Out", {grad_var->Name()}); + } for (int i = forward_ops.size() - 1; i >= 0; --i) { OpDesc* op = forward_ops[i]; OpDesc* grad_op = block->AppendOp(); @@ -428,8 +438,21 @@ static std::string DebugString(Node* node) { is_first = false; } os << "}."; - } else if (node->IsVar() && node->Var()) { - os << "Node(" << node->Name() << "), inputs:{"; + } else { + os << "Node(" << node->Name(); + if (node->IsVar() && node->Var()) { + os << "{"; + bool is_first = true; + for (auto dim : node->Var()->GetShape()) { + if (!is_first) { + os << "x"; + } + os << dim; + is_first = false; + } + os << "}"; + } + os << "), inputs:{"; bool is_first = true; for (auto* in : node->inputs) { if (!is_first) { @@ -477,12 +500,16 @@ static std::string DebugString(const std::unordered_set& nodes) { return DebugString(vec); } -static std::string DebugString(const std::unique_ptr& graph) { +static std::string DebugString(Graph* graph) { std::ostringstream os; os << "Graph: {\n" << DebugString(graph->Nodes()) << "}\n"; return os.str(); } +static std::string DebugString(const std::unique_ptr& graph) { + return DebugString(graph.get()); +} + static int GetNumOpNodes(const std::unique_ptr& graph, std::string op_type) { int num_nodes = 0; diff --git a/paddle/fluid/platform/device_code_test.cc b/paddle/fluid/platform/device_code_test.cc index aa6bce6f1e54f86098b6df729ff00373ff0a638f..9331532058095df61b0a07d78c9ebb14e8be8ce8 100644 --- a/paddle/fluid/platform/device_code_test.cc +++ b/paddle/fluid/platform/device_code_test.cc @@ -85,7 +85,8 @@ TEST(DeviceCode, cuda) { } TEST(DeviceCodePool, cuda) { - if (!paddle::platform::dynload::HasNVRTC()) { + if (!paddle::platform::dynload::HasNVRTC() || + !paddle::platform::dynload::HasCUDADriver()) { return; }