From 050fd16876f54ae4aad9885a3ea6edefa6faa34d Mon Sep 17 00:00:00 2001 From: Allen Guo Date: Wed, 12 Jan 2022 19:27:12 +0800 Subject: [PATCH] [IPU] add more ops (#38831) * support more ops * Co-authored-by: Xiaobing Wang Co-authored-by: Allen Guo Co-authored-by: Zhixin Yao Co-authored-by: Haicheng Jiang Co-authored-by: Han Zhao * add authors Co-authored-by: Xiaobing Wang Co-authored-by: Allen Guo Co-authored-by: Zhixin Yao Co-authored-by: Haicheng Jiang Co-authored-by: Han Zhao * update date Co-authored-by: Xiaobing Wang Co-authored-by: Zhixin Yao Co-authored-by: Haicheng Jiang Co-authored-by: Han Zhao --- .../ir/ipu/popart_canonicalization_pass.cc | 1 - .../popart_canonicalization/activation_ops.cc | 32 ++- .../canonicalization_utils.cc | 11 + .../canonicalization_utils.h | 6 +- .../ipu/popart_canonicalization/logic_ops.cc | 14 ++ .../ipu/popart_canonicalization/math_ops.cc | 225 +++++++++++++----- .../ipu/popart_canonicalization/nn_ops.cc | 23 +- .../ipu/popart_canonicalization/op_builder.cc | 34 ++- .../ipu/popart_canonicalization/op_builder.h | 9 +- .../ipu/popart_canonicalization/other_ops.cc | 65 +++++ .../ipu/popart_canonicalization/search_ops.cc | 72 +++--- .../ipu/popart_canonicalization/tensor_ops.cc | 159 +++++++++++-- 12 files changed, 517 insertions(+), 134 deletions(-) create mode 100644 paddle/fluid/platform/device/ipu/popart_canonicalization/other_ops.cc diff --git a/paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.cc b/paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.cc index c97b7fd5bcb..d2d76f9a9a2 100644 --- a/paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.cc +++ b/paddle/fluid/framework/ir/ipu/popart_canonicalization_pass.cc @@ -16,7 +16,6 @@ #include "paddle/fluid/framework/ir/pass_tester_helper.h" #include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h" -#include "paddle/fluid/platform/device/ipu/popart_canonicalization/post_canonicalization.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/activation_ops.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/activation_ops.cc index 5793c4c0e3c..fc2f1e476b9 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/activation_ops.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/activation_ops.cc @@ -48,7 +48,37 @@ Node *sqrt_handler(Graph *graph, Node *node) { } Node *gelu_handler(Graph *graph, Node *node) { - return activation_op_handler(graph, node, "popart_gelu_v2"); + auto *op = node->Op(); + auto approximate_ = BOOST_GET_CONST(bool, op->GetAttr("approximate")); + if (approximate_) { + return activation_op_handler(graph, node, "popart_gelu_v2"); + } else { + auto sqrt2 = CreateConst(graph, node, {}, {}, + {{"value", std::vector{1.4142135623730951}}, + {"dims", std::vector{1}}, + {"dtype", GetOutputVarDtype(node)}}); + auto zero_point_five = + CreateConst(graph, node, {}, {}, {{"value", std::vector{0.5}}, + {"dims", std::vector{1}}, + {"dtype", GetOutputVarDtype(node)}}); + auto one = + CreateConst(graph, node, {}, {}, {{"value", std::vector{1}}, + {"dims", std::vector{1}}, + {"dtype", GetOutputVarDtype(node)}}); + auto div = + CreateBaseOp(graph, node, "popart_div", + {GetInputVarNode("X", node), sqrt2->outputs[0]}, {}, {}); + auto erf = + CreateBaseOp(graph, node, "popart_erf", {div->outputs[0]}, {}, {}); + auto add = CreateBaseOp(graph, node, "popart_add", + {erf->outputs[0], one->outputs[0]}, {}, {}); + auto mul1 = + CreateBaseOp(graph, node, "popart_mul", + {GetInputVarNode("X", node), add->outputs[0]}, {}, {}); + return CreateBaseOp(graph, node, "popart_mul", + {mul1->outputs[0], zero_point_five->outputs[0]}, + {GetOutputVarNode("Out", node)}, {}); + } } Node *log_softmax_handler(Graph *graph, Node *node) { diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.cc index d46fc55ec6c..3d22f75d345 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.cc @@ -180,6 +180,17 @@ const bool is_float_equal(float a, float b, float eps) { return std::fabs(a - b) <= eps; } +const int GetOutputVarDtype(const Node *node, const std::string &output_name) { + auto out_node = GetOutputVarNode(output_name, node); + PADDLE_ENFORCE_NOT_NULL(out_node, platform::errors::Unavailable( + "Node's out node does not exist.")); + auto var = out_node->Var(); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::Unavailable("Node is not a variable.")); + auto proto_var_type = var->GetDataType(); + return VarType2OnnxDtype(proto_var_type); +} + } // namespace ipu } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h b/paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h index c1b2bd0c8b5..5725ec767a4 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h @@ -23,10 +23,6 @@ namespace paddle { namespace platform { namespace ipu { -using framework::ir::Graph; -using framework::ir::Node; -using framework::OpDesc; - #define REGISTER_HANDLER(name, func) \ static bool __UNUSED_##name = \ paddle::platform::ipu::RegisterHandler(#name, func) @@ -58,6 +54,8 @@ Node *GetOutputVarNodeByVarName(const std::string &var_name, const Node *op_node); const bool is_float_equal(float a, float b, float eps = 1e-8); +const int GetOutputVarDtype(const Node *node, + const std::string &output_name = "Out"); } // namespace ipu } // namespace platform diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/logic_ops.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/logic_ops.cc index 92362ebf5be..c980bb780cf 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/logic_ops.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/logic_ops.cc @@ -28,7 +28,21 @@ Node *equal_handler(Graph *graph, Node *node) { return new_node; } +Node *logical_not_handler(Graph *graph, Node *node) { + return CreateBaseOp(graph, node, "popart_logical_not", + {GetInputVarNode("X", node)}, + {GetOutputVarNode("Out", node)}, {}); +} + +Node *greater_than_handler(Graph *graph, Node *node) { + return CreateBaseOp(graph, node, "popart_greater", + {GetInputVarNode("X", node), GetInputVarNode("Y", node)}, + {GetOutputVarNode("Out", node)}, {}); +} + REGISTER_HANDLER(equal, equal_handler); +REGISTER_HANDLER(logical_not, logical_not_handler); +REGISTER_HANDLER(greater_than, greater_than_handler); } // namespace } // namespace ipu diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/math_ops.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/math_ops.cc index af7e4d0c7db..67012e8d4b9 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/math_ops.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/math_ops.cc @@ -41,7 +41,8 @@ Node *pow_handler(Graph *graph, Node *node) { // Op(pow) -> Op(Constant)->Var(const_out)->Op(Pow) auto value_ = BOOST_GET_CONST(float, op->GetAttr("factor")); auto attrs = - MakeConstAttrMapFromValue(value_, {1}, ONNXDataType::FLOAT); + MakeConstAttrMapFromValue(value_, {1}, GetOutputVarDtype(node)); + auto new_node_const = CreateConst(graph, node, {}, {}, attrs); return CreateBaseOp(graph, node, "popart_pow", {GetInputVarNode("X", node), new_node_const->outputs[0]}, @@ -122,16 +123,16 @@ Node *matmul_handler(Graph *graph, Node *node) { y_node = y_node->outputs[0]; } if (is_float_equal(alpha, 1.0)) { + return CreateBaseOp(graph, node, "popart_matmul", {x_node, y_node}, + node->outputs); + } else { auto o_node = CreateBaseOp(graph, node, "popart_matmul", {x_node, y_node}, {}); - auto attr = MakeConstAttrMapFromValue(alpha, {1}, ONNXDataType::FLOAT); + auto attr = MakeConstAttrMapFromValue(alpha, {1}, GetOutputVarDtype(node)); auto const_node = CreateConst(graph, node, {}, {}, attr); return CreateBaseOp(graph, node, "popart_mul", {o_node->outputs[0], const_node->outputs[0]}, node->outputs); - } else { - return CreateBaseOp(graph, node, "popart_matmul", {x_node, y_node}, - node->outputs); } } @@ -141,7 +142,10 @@ Node *sum_handler(Graph *graph, Node *node) { Node *softmax_handler(Graph *graph, Node *node) { auto *op = node->Op(); - auto axis = BOOST_GET_CONST(int, op->GetAttr("axis")); + int axis = -1; + if (op->HasAttr("axis")) { + axis = BOOST_GET_CONST(int, op->GetAttr("axis")); + } return CreateSoftmaxOpset11(graph, node, node->inputs, node->outputs, axis); } @@ -153,42 +157,72 @@ Node *scale_handler(Graph *graph, Node *node) { BOOST_GET_CONST(bool, op->GetAttr("bias_after_scale")); auto data_type_ = GetInputVarNode("X", node)->Var()->GetDataType(); - auto new_node_bias_var = - CreateConst(graph, node, {}, {}, {{"value", std::vector{bias_}}, - {"dims", std::vector{1}}, - {"dtype", ONNXDataType::FLOAT}}); - new_node_bias_var = new_node_bias_var->outputs[0]; - - Node *new_node_scale_var = nullptr; - if (op->HasInput("ScaleTensor") && !op->Input("ScaleTensor").empty()) { - new_node_scale_var = GetInputVarNode("ScaleTensor", node); - } else { - new_node_scale_var = - CreateConst(graph, node, {}, {}, {{"value", std::vector{scale_}}, - {"dims", std::vector{1}}, - {"dtype", ONNXDataType::FLOAT}}); - new_node_scale_var = new_node_scale_var->outputs[0]; - } + auto cast = CreateCast(graph, node, {GetInputVarNode("X", node)}, {}, + static_cast(framework::proto::VarType::FP32)); - // convert to float32 - auto new_node_cast = - CreateCast(graph, node, {GetInputVarNode("X", node)}, {}, - static_cast(framework::proto::VarType::FP32)); Node *result = nullptr; - if (bias_after_scale_) { - auto new_node_mul = - CreateBaseOp(graph, node, "popart_mul", - {new_node_cast->outputs[0], new_node_scale_var}, {}, {}); - result = - CreateBaseOp(graph, node, "popart_add", - {new_node_mul->outputs[0], new_node_bias_var}, {}, {}); + if (op->HasInput("ScaleTensor") && !op->Input("ScaleTensor").empty()) { + auto scale = GetInputVarNode("ScaleTensor", node); + if (is_float_equal(bias_, 0.0)) { + result = CreateBaseOp(graph, node, "popart_mul", + {cast->outputs[0], scale}, {}, {}); + } else { + auto bias = CreateConst(graph, node, {}, {}, + {{"value", std::vector{bias_}}, + {"dims", std::vector{1}}, + {"dtype", ONNXDataType::FLOAT}}); + bias = bias->outputs[0]; + if (bias_after_scale_) { + auto mul = CreateBaseOp(graph, node, "popart_mul", + {cast->outputs[0], scale}, {}, {}); + result = CreateBaseOp(graph, node, "popart_add", + {mul->outputs[0], bias}, {}, {}); + } else { + auto add = CreateBaseOp(graph, node, "popart_add", + {cast->outputs[0], bias}, {}, {}); + result = CreateBaseOp(graph, node, "popart_mul", + {add->outputs[0], scale}, {}, {}); + } + } } else { - auto new_node_add = - CreateBaseOp(graph, node, "popart_add", - {new_node_cast->outputs[0], new_node_bias_var}, {}, {}); - result = - CreateBaseOp(graph, node, "popart_mul", - {new_node_add->outputs[0], new_node_scale_var}, {}, {}); + if (is_float_equal(bias_, 0.0) && is_float_equal(scale_, 1.0)) { + return CreateBaseOp(graph, node, "popart_identity", + {GetInputVarNode("X", node)}, node->outputs, {}); + } else if (is_float_equal(scale_, 1.0)) { + auto bias = CreateConst(graph, node, {}, {}, + {{"value", std::vector{bias_}}, + {"dims", std::vector{1}}, + {"dtype", ONNXDataType::FLOAT}}); + result = CreateBaseOp(graph, node, "popart_add", + {cast->outputs[0], bias->outputs[0]}, {}, {}); + } else if (is_float_equal(bias_, 0.0)) { + auto scale = CreateConst(graph, node, {}, {}, + {{"value", std::vector{scale_}}, + {"dims", std::vector{1}}, + {"dtype", ONNXDataType::FLOAT}}); + result = CreateBaseOp(graph, node, "popart_mul", + {cast->outputs[0], scale->outputs[0]}, {}, {}); + } else { + auto bias = CreateConst(graph, node, {}, {}, + {{"value", std::vector{bias_}}, + {"dims", std::vector{1}}, + {"dtype", ONNXDataType::FLOAT}}); + auto scale = CreateConst(graph, node, {}, {}, + {{"value", std::vector{scale_}}, + {"dims", std::vector{1}}, + {"dtype", ONNXDataType::FLOAT}}); + if (bias_after_scale_) { + auto mul = CreateBaseOp(graph, node, "popart_mul", + {cast->outputs[0], scale->outputs[0]}, {}, {}); + result = CreateBaseOp(graph, node, "popart_add", + {mul->outputs[0], bias->outputs[0]}, {}, {}); + } else { + auto add = CreateBaseOp(graph, node, "popart_add", + {cast->outputs[0], bias->outputs[0]}, {}, {}); + result = CreateBaseOp(graph, node, "popart_mul", + {add->outputs[0], scale->outputs[0]}, {}, {}); + } + } } auto result_after_cast = CreateCast(graph, node, result->outputs, node->outputs, @@ -199,16 +233,27 @@ Node *scale_handler(Graph *graph, Node *node) { Node *cross_entropy2_handler(Graph *graph, Node *node) { auto *op = node->Op(); auto ignoreIndex = BOOST_GET_CONST(int, op->GetAttr("ignore_index")); - auto new_cast = CreateCast(graph, node, {GetInputVarNode("Label", node)}, {}, - framework::proto::VarType::INT32); + Node *new_cast = nullptr; + if (GetInputVarNode("Label", node)->Var()->GetDataType() == + framework::proto::VarType::INT32) { + new_cast = GetInputVarNode("Label", node); + } else { + auto new_cast = CreateCast(graph, node, {GetInputVarNode("Label", node)}, + {}, framework::proto::VarType::INT32); + new_cast = new_cast->outputs[0]; + } auto label_shape_ = GetInputVarNode("Label", node)->Var()->GetShape(); - if (label_shape_.size() == 1) { - return CreateBaseOp(graph, node, "popart_nllloss", - {GetInputVarNode("X", node), new_cast->outputs[0]}, - {GetOutputVarNode("Y", node)}, - { - {"ignoreIndex", ignoreIndex}, - }); + if (label_shape_[label_shape_.size() - 1] != 1) { + auto log = CreateBaseOp(graph, node, "popart_log", + {GetInputVarNode("X", node)}, {}, {}); + return CreateBaseOp( + graph, node, "popart_nllloss_v2", {log->outputs[0], new_cast}, + {GetOutputVarNode("Y", node)}, + { + {"reduction", 2}, // popart::ReductionType::NoReduction + {"ignoreIndex", ignoreIndex}, + {"inputIsLogProbability", true}, + }); } else { std::vector new_shape_{label_shape_[0]}; auto const_before_loss = CreateBaseOp( @@ -218,15 +263,19 @@ Node *cross_entropy2_handler(Graph *graph, Node *node) { std::vector{static_cast(new_shape_.size())}}, {"dtype", ONNXDataType::INT64}}); - auto reshape_before_loss = CreateBaseOp( - graph, node, "popart_reshape", - {new_cast->outputs[0], const_before_loss->outputs[0]}, {}, {}); + auto reshape_before_loss = + CreateBaseOp(graph, node, "popart_reshape", + {new_cast, const_before_loss->outputs[0]}, {}, {}); + auto log = CreateBaseOp(graph, node, "popart_log", + {GetInputVarNode("X", node)}, {}, {}); auto nllloss = CreateBaseOp( - graph, node, "popart_nllloss", - {GetInputVarNode("X", node), reshape_before_loss->outputs[0]}, {}, + graph, node, "popart_nllloss_v2", + {log->outputs[0], reshape_before_loss->outputs[0]}, {}, { + {"reduction", 2}, // popart::ReductionType::NoReduction {"ignoreIndex", ignoreIndex}, + {"inputIsLogProbability", true}, }); auto const_after_loss = CreateBaseOp( @@ -244,6 +293,73 @@ Node *cross_entropy2_handler(Graph *graph, Node *node) { } } +Node *cumsum_handler(Graph *graph, Node *node) { + auto *op = node->Op(); + auto exclusive = BOOST_GET_CONST(bool, op->GetAttr("exclusive")); + int64_t popart_exclusive = 1 ? exclusive : 0; + auto reverse = BOOST_GET_CONST(bool, op->GetAttr("reverse")); + int64_t popart_reverse = 1 ? reverse : 0; + auto axis = BOOST_GET_CONST(int, op->GetAttr("axis")); + auto axis_node = + CreateConst(graph, node, {}, {}, {{"value", std::vector{axis}}, + {"dims", std::vector{1}}, + {"dtype", ONNXDataType::INT64}}); + return CreateBaseOp( + graph, node, "popart_cumsum", + {GetInputVarNode("X", node), axis_node->outputs[0]}, + {GetOutputVarNode("Out", node)}, + {{"exclusive", popart_exclusive}, {"reverse", popart_reverse}}); +} + +Node *matmul_v2_handler(Graph *graph, Node *node) { + auto *op = node->Op(); + auto transpose_x = BOOST_GET_CONST(bool, op->GetAttr("trans_x")); + auto transpose_y = BOOST_GET_CONST(bool, op->GetAttr("trans_y")); + auto x_shape = GetInputVarNode("X", node)->Var()->GetShape(); + auto y_shape = GetInputVarNode("Y", node)->Var()->GetShape(); + + std::vector perm; + int x_rank = x_shape.size(); + if (x_rank == 1) { + perm = std::vector{0}; + } else if (x_rank == 2) { + perm = std::vector{1, 0}; + } else if (x_rank == 3) { + perm = std::vector{0, 2, 1}; + } else if (x_rank == 4) { + perm = std::vector{0, 1, 3, 2}; + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "op matmul with input rank == %d", x_rank)); + } + + Node *x_node = GetInputVarNode("X", node); + Node *y_node = GetInputVarNode("Y", node); + + if (transpose_x) { + x_node = CreateBaseOp(graph, node, "popart_transpose", + {GetInputVarNode("X", node)}, {}, {{"perm", perm}}); + x_node = x_node->outputs[0]; + } + if (transpose_y) { + y_node = CreateBaseOp(graph, node, "popart_transpose", + {GetInputVarNode("Y", node)}, {}, {{"perm", perm}}); + y_node = y_node->outputs[0]; + } + + return CreateBaseOp(graph, node, "popart_matmul", {x_node, y_node}, + node->outputs); +} + +Node *arg_max_handler(Graph *graph, Node *node) { + auto *op = node->Op(); + auto axis = BOOST_GET_CONST(int64_t, op->GetAttr("axis")); + return CreateBaseOp(graph, node, "popart_argmax", + {GetInputVarNode("X", node)}, + {GetOutputVarNode("Out", node)}, + {{"axis", axis}, {"keepdims", int64_t{0}}}); +} + REGISTER_HANDLER(mean, mean_handler); REGISTER_HANDLER(pow, pow_handler); REGISTER_HANDLER(mul, mul_handler); @@ -252,6 +368,9 @@ REGISTER_HANDLER(sum, sum_handler); REGISTER_HANDLER(softmax, softmax_handler); REGISTER_HANDLER(scale, scale_handler); REGISTER_HANDLER(cross_entropy2, cross_entropy2_handler); +REGISTER_HANDLER(cumsum, cumsum_handler); +REGISTER_HANDLER(matmul_v2, matmul_v2_handler); +REGISTER_HANDLER(arg_max, arg_max_handler); } // namespace } // namespace ipu diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/nn_ops.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/nn_ops.cc index 58f3e42b738..b7412000107 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/nn_ops.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/nn_ops.cc @@ -22,7 +22,7 @@ namespace ipu { namespace { Node *conv2d_handler(Graph *graph, Node *node) { - OpDesc *op = node->Op(); + auto *op = node->Op(); auto dilations_ = BOOST_GET_CONST(std::vector, op->GetAttr("dilations")); auto dilations = std::vector{dilations_.begin(), dilations_.end()}; auto group_ = BOOST_GET_CONST(int, op->GetAttr("groups")); @@ -193,6 +193,21 @@ Node *layer_norm_handler(Graph *graph, Node *node) { auto *op = node->Op(); auto begin_norm_axis_ = BOOST_GET_CONST(int, op->GetAttr("begin_norm_axis")); auto input_shape_ = GetInputVarNode("X", node)->Var()->GetShape(); + auto epsilon_ = BOOST_GET_CONST(float, op->GetAttr("epsilon")); + int64_t groups_ = 1; + + auto groupnorm_attrs_ = + AttributeMap{{"epsilon", epsilon_}, {"num_groups", groups_}}; + + if (input_shape_.size() == 2) { + return CreateBaseOp( + graph, node, "popart_groupnormalization_v2", + {GetInputVarNode("X", node), GetInputVarNode("Scale", node), + GetInputVarNode("Bias", node)}, + {GetOutputVarNode("Y", node), GetOutputVarNode("Mean", node), + GetOutputVarNode("Variance", node)}, + groupnorm_attrs_); + } std::vector norm_shape_{1, 1}; for (int i = 0; i < input_shape_.size(); i++) { @@ -213,10 +228,6 @@ Node *layer_norm_handler(Graph *graph, Node *node) { graph, node, "popart_reshape", {GetInputVarNode("X", node), reshape1_const->outputs[0]}, {}, {}); - auto epsilon_ = BOOST_GET_CONST(float, op->GetAttr("epsilon")); - int64_t groups_ = 1; - auto groupnorm_attrs_ = - AttributeMap{{"epsilon", epsilon_}, {"num_groups", groups_}}; auto out_Y_ = MakeVarNode(graph, node); CreateBaseOp(graph, node, "popart_groupnormalization_v2", {new_node_reshape1->outputs[0], GetInputVarNode("Scale", node), @@ -262,7 +273,7 @@ Node *dropout_handler(Graph *graph, Node *node) { CreateConst(graph, node, {}, {}, {{"value", std::vector{1 - dropout_prob_}}, {"dims", std::vector{1}}, - {"dtype", ONNXDataType::FLOAT}}); + {"dtype", GetOutputVarDtype(node)}}); return CreateBaseOp(graph, node, "popart_mul", {GetInputVarNode("X", node), scale->outputs[0]}, {GetOutputVarNode("Out", node)}, {}); diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.cc index b7a3a8ca7c6..3ec1999edc4 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.cc @@ -31,15 +31,31 @@ const std::string GenerateOpName() { } const std::string CreateOpIdentifyId(Node *node) { - // format: op_type|out_var0|out_var1|...|_gen_* + // format: + // if has custom op_namescope: + // {op_namescope}/op_type/_gen_* + // else: + // {op_type}/{out_var0}/{out_var1}/.../_gen_* // this name will be used as op name when exporting onnx model from popart auto op_type = node->Name(); - std::string op_out = ""; - for (auto *out_node : node->outputs) { - op_out += "|"; - op_out += out_node->Name(); + std::string op_namescope; + if (node->Op()->HasAttr("op_namescope")) { + op_namescope = + BOOST_GET_CONST(std::string, node->Op()->GetAttr("op_namescope")); + } else { + op_namescope = "/"; + } + + if (op_namescope != "/") { + return {op_namescope + op_type + "/" + GenerateOpName()}; + } else { + std::string op_out = ""; + for (auto *out_node : node->outputs) { + op_out += "/"; + op_out += out_node->Name(); + } + return {op_type + op_out + "/" + GenerateOpName()}; } - return {op_type + op_out + "|" + GenerateOpName()}; } Node *MakeVarNode(Graph *graph, Node *node) { @@ -100,6 +116,12 @@ Node *CreateBaseOp(Graph *graph, Node *node, const std::string &type, if (!new_node->Op()->HasAttr(sIpuStageAttr)) { CopyOpAttr(sIpuStageAttr, node->Op(), new_node->Op()); } + if (node->Op()->HasAttr(sMatmulSerializeFactor)) { + CopyOpAttr(sMatmulSerializeFactor, node->Op(), new_node->Op()); + } + if (node->Op()->HasAttr(sMatmulSerializeMode)) { + CopyOpAttr(sMatmulSerializeMode, node->Op(), new_node->Op()); + } { new_node->Op()->SetAttr(sOpIdentifyIdAttr, CreateOpIdentifyId(node)); new_node->Op()->Flush(); diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.h b/paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.h index 7e70e56ef91..de3788e437a 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.h +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.h @@ -14,15 +14,16 @@ #pragma once -#include "paddle/fluid/platform/device/ipu/common.h" +#include "paddle/fluid/platform/device/ipu/ipu_names.h" #include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h" +using paddle::framework::AttributeMap; +using paddle::framework::Attribute; + namespace paddle { namespace platform { namespace ipu { -using paddle::framework::AttributeMap; - template AttributeMap MakeConstAttrMap(std::vector value, std::vector dims, int dtype) { @@ -56,7 +57,7 @@ Node *CreateConst(Graph *graph, Node *node, const std::vector &inputs, const std::vector &outputs, const AttributeMap &attrs); -// otype is proto::VarType::Type +// otype is framework::proto::VarType::Type Node *CreateCast(Graph *graph, Node *node, const std::vector &inputs, const std::vector &outputs, const int otype); diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/other_ops.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/other_ops.cc new file mode 100644 index 00000000000..0919afef4d8 --- /dev/null +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/other_ops.cc @@ -0,0 +1,65 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/platform/device/ipu/popart_canonicalization/canonicalization_utils.h" +#include "paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.h" +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace platform { +namespace ipu { +namespace { + +Node *custom_op_handler(Graph *graph, Node *node) { + auto *op = node->Op(); + auto attrs = op->GetAttrMap(); + attrs.insert({"__op_type", node->Op()->Type()}); + auto new_node = CreateBaseOp(graph, node, "popart_custom_op", node->inputs, + node->outputs, attrs); + return new_node; +} + +Node *print_handler(Graph *graph, Node *node) { + auto *op = node->Op(); + auto print_phase = BOOST_GET_CONST(std::string, op->GetAttr("print_phase")); + int64_t print_gradient = 0; + if (print_phase != "forward") { + print_gradient = 1; + } + auto title = BOOST_GET_CONST(std::string, op->GetAttr("message")); + if (title.empty()) { + title = GetInputVarNode("In", node)->Var()->Name(); + } + auto attrs = + AttributeMap{{"print_gradient", print_gradient}, {"title", title}}; + return CreateBaseOp(graph, node, "popart_printtensor", node->inputs, + node->outputs, attrs); +} + +Node *popart_optimizer_handler(Graph *graph, Node *node) { return nullptr; } + +Node *checkpointoutput_handler(Graph *graph, Node *node) { + return CreateBaseOp(graph, node, "popart_checkpointoutput", node->inputs, + node->outputs); +} + +REGISTER_HANDLER(custom_op, custom_op_handler); +REGISTER_HANDLER(print, print_handler); +REGISTER_HANDLER(popart_optimizer, popart_optimizer_handler); +REGISTER_HANDLER(checkpointoutput, checkpointoutput_handler); + +} // namespace +} // namespace ipu +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/search_ops.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/search_ops.cc index e90faa502ec..662660c23b4 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/search_ops.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/search_ops.cc @@ -21,24 +21,24 @@ namespace platform { namespace ipu { namespace { -Node *topK_op_handler(Graph *graph, Node *node) { - VLOG(10) << "[topK_op_handler] entering to handler ..."; +Node *topk_handler(Graph *graph, Node *node) { auto *op = node->Op(); auto attrs = AttributeMap{}; - int axis_32INT = -1; + + int axis_ = -1; if (op->HasAttr("axis")) { - axis_32INT = BOOST_GET_CONST(int, op->GetAttr("axis")); + axis_ = BOOST_GET_CONST(int, op->GetAttr("axis")); } - if (axis_32INT == -1) { + if (axis_ == -1) { auto shape = GetInputVarNode("X", node)->Var()->GetShape(); int rank = shape.size(); if (rank < 1) { PADDLE_THROW(platform::errors::InvalidArgument( "The dimension of the shape of topK input should be large than 1")); } - axis_32INT = rank - 1; + axis_ = rank - 1; } - int64_t axis = int64_t{axis_32INT}; + int64_t axis = int64_t{axis_}; attrs.emplace("axis", axis); bool largest = true; @@ -63,45 +63,31 @@ Node *topK_op_handler(Graph *graph, Node *node) { attrs.emplace("sorted", 0); } - std::vector inputs = node->inputs; - if (node->inputs.size() == 2) { - // Input X tensor and K const tensor - VLOG(10) << "[topK_op_handler] get 2 input tensors."; - inputs[0] = node->inputs[1]; // K_t - VLOG(10) << "[topK_op_handler] input node(" << inputs[0]->Var()->Name() - << ")"; - inputs[1] = node->inputs[0]; // X - VLOG(10) << "[topK_op_handler] input node(" << inputs[1]->Var()->Name() - << ")"; - } else if (node->inputs.size() == 1) { - // Input X tensor with k integer - VLOG(10) << "[topK_op_handler] get 1 input tensor."; - int k_32INT = BOOST_GET_CONST(int, op->GetAttr("k")); - int64_t k = int64_t{k_32INT}; - attrs.emplace("k", k); - } - // show output node dtype - for (auto *o_node : node->outputs) { - auto *var = o_node->Var(); - // see framework.pb.h - // VarType_Type_INT64 = 3, - // VarType_Type_FP32 = 5, - auto dtype = var->GetDataType(); - if (dtype == 3) { - // poplar does not support int64_t - var->SetDataType(framework::proto::VarType::INT32); - } - std::string name = var->Name(); - VLOG(10) << "[topK_op_handler] output node(" << name - << ") dtype : " << dtype; + Node *var_x = GetInputVarNode("X", node); + Node *var_k = nullptr; + if (op->HasInput("K") && !op->Input("K").empty()) { + var_k = GetInputVarNode("K", node); + } else { + auto k = BOOST_GET_CONST(int, op->GetAttr("k")); + auto *op_k = + CreateConst(graph, node, {}, {}, {{"value", std::vector{k}}, + {"dims", std::vector{1}}, + {"dtype", ONNXDataType::INT64}}); + var_k = op_k->outputs[0]; } - VLOG(10) << "[topK_op_handler] leave the handler."; - return CreateBaseOp(graph, node, "popart_topk", inputs, - {node->outputs[1], node->outputs[0]}, attrs); + + auto *var_i = MakeVarNode(graph, node); + CreateBaseOp(graph, node, "popart_topk", {var_x, var_k}, + {GetOutputVarNode("Out", node), var_i}, + {{"axis", int64_t{axis}}, + {"largest", int64_t{largest}}, + {"sorted", int64_t{sorted}}}); + return CreateCast(graph, node, {var_i}, {GetOutputVarNode("Indices", node)}, + static_cast(framework::proto::VarType::INT32)); } -REGISTER_HANDLER(top_k, topK_op_handler); -REGISTER_HANDLER(top_k_v2, topK_op_handler); +REGISTER_HANDLER(top_k, topk_handler); +REGISTER_HANDLER(top_k_v2, topk_handler); } // namespace } // namespace ipu diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/tensor_ops.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/tensor_ops.cc index f1f77b53e46..296668890eb 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/tensor_ops.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/tensor_ops.cc @@ -21,9 +21,6 @@ namespace platform { namespace ipu { namespace { -using framework::Attribute; -using framework::AttributeMap; - Node *fill_constant_handler(Graph *graph, Node *node) { auto *op = node->Op(); if (op->HasInput("ShapeTensor") && !op->Input("ShapeTensor").empty()) { @@ -133,6 +130,14 @@ Node *reshape_handler(Graph *graph, Node *node) { return new_node_reshape; } +Node *flatten2_handler(Graph *graph, Node *node) { + auto *op = node->Op(); + auto axis = BOOST_GET_CONST(int, op->GetAttr("axis")); + return CreateBaseOp( + graph, node, "popart_flatten", {GetInputVarNode("X", node)}, + {GetOutputVarNode("Out", node)}, {{"axis", int64_t(axis)}}); +} + Node *gather_handler(Graph *graph, Node *node) { auto new_node_gather = CreateBaseOp(graph, node, "popart_gather", @@ -169,7 +174,8 @@ Node *cast_handler(Graph *graph, Node *node) { return new_node_cast; } -Node *lookup_table_handler(Graph *graph, Node *node) { +Node *lookup_table_op_handler(Graph *graph, Node *node, + const std::string &type) { auto *op = node->Op(); auto padding_idx_ = BOOST_GET_CONST(int64_t, op->GetAttr("padding_idx")); auto w_shape_ = GetInputVarNode("W", node)->Var()->GetShape(); @@ -183,7 +189,7 @@ Node *lookup_table_handler(Graph *graph, Node *node) { auto concat_const = CreateConst(graph, node, {}, {}, {{"value", const_value_}, {"dims", const_shape_}, - {"dtype", ONNXDataType::FLOAT}}); + {"dtype", GetOutputVarDtype(node)}}); auto axes = CreateConst(graph, node, {}, {}, {{"value", std::vector{0}}, {"dims", std::vector{1}}, @@ -247,16 +253,28 @@ Node *lookup_table_handler(Graph *graph, Node *node) { w_node = GetInputVarNode("W", node); } - auto squeeze = CreateBaseOp(graph, node, "popart_squeeze", - {GetInputVarNode("Ids", node)}, {}, - {{"axes", std::vector{-1}}}); + // lookup_table and lookup_table_v2 + auto ids = GetInputVarNode("Ids", node); + if (type == "v1") { + ids = CreateBaseOp(graph, node, "popart_squeeze", + {GetInputVarNode("Ids", node)}, {}, + {{"axes", std::vector{-1}}}); + ids = ids->outputs[0]; + } - auto gather = - CreateBaseOp(graph, node, "popart_gather", {w_node, squeeze->outputs[0]}, - {GetOutputVarNode("Out", node)}, {}); + auto gather = CreateBaseOp(graph, node, "popart_gather", {w_node, ids}, + {GetOutputVarNode("Out", node)}, {}); return gather; } +Node *lookup_table_handler(Graph *graph, Node *node) { + return lookup_table_op_handler(graph, node, "v1"); +} + +Node *lookup_table_v2_handler(Graph *graph, Node *node) { + return lookup_table_op_handler(graph, node, "v2"); +} + Node *unsqueeze_handler(Graph *graph, Node *node) { auto *op = node->Op(); auto axes_ = BOOST_GET_CONST(std::vector, op->GetAttr("axes")); @@ -336,11 +354,32 @@ Node *slice_handler(Graph *graph, Node *node) { auto attr = MakeConstAttrMap(axes_, {dim}, ONNXDataType::INT32); axes = CreateConst(graph, node, {}, {}, attr); } - auto new_node = CreateBaseOp( - graph, node, "popart_slice", - {GetInputVarNode("Input", node), starts, ends, axes->outputs[0]}, - node->outputs); - return new_node; + + auto decrease_axis_ = + BOOST_GET_CONST(std::vector, op->GetAttr("decrease_axis")); + auto input_shape_ = GetInputVarNode("Input", node)->Var()->GetShape(); + auto output_shape_ = GetOutputVarNode("Out", node)->Var()->GetShape(); + if (decrease_axis_.size() == 0) { + return CreateBaseOp( + graph, node, "popart_slice", + {GetInputVarNode("Input", node), starts, ends, axes->outputs[0]}, + node->outputs); + } else if (output_shape_ == std::vector{0} || + input_shape_.size() > output_shape_.size()) { + auto slice = CreateBaseOp( + graph, node, "popart_slice", + {GetInputVarNode("Input", node), starts, ends, axes->outputs[0]}, {}, + {}); + return CreateBaseOp(graph, node, "popart_squeeze", {slice->outputs[0]}, + {GetOutputVarNode("Out", node)}, + {{"axes", std::vector{decrease_axis_.begin(), + decrease_axis_.end()}}}); + } else { + return CreateBaseOp( + graph, node, "popart_slice", + {GetInputVarNode("Input", node), starts, ends, axes->outputs[0]}, + node->outputs); + } } Node *expand_handler(Graph *graph, Node *node) { @@ -373,11 +412,94 @@ Node *expand_handler(Graph *graph, Node *node) { return new_node; } +Node *assign_handler(Graph *graph, Node *node) { + return CreateBaseOp(graph, node, "popart_identity", + {GetInputVarNode("X", node)}, + {GetOutputVarNode("Out", node)}, {}); +} + +Node *fill_any_like_handler(Graph *graph, Node *node) { + auto *op = node->Op(); + auto value = BOOST_GET_CONST(float, op->GetAttr("value")); + auto x_shape = GetInputVarNode("X", node)->Var()->GetShape(); + auto dtype = BOOST_GET_CONST(int, op->GetAttr("dtype")); + auto x_dtype = static_cast(dtype); + size_t size = 1; + for (auto &dim : x_shape) { + size *= dim; + } + + Attribute out_value; + switch (x_dtype) { + case framework::proto::VarType::FP32: + out_value = std::vector(size, value); + break; + case framework::proto::VarType::FP64: + out_value = std::vector(size, value); + break; + case framework::proto::VarType::INT32: + out_value = std::vector(size, value); + break; + case framework::proto::VarType::INT64: + out_value = std::vector(size, value); + break; + case framework::proto::VarType::BOOL: + out_value = std::vector(size, value); + break; + default: + PADDLE_THROW( + platform::errors::Unimplemented("fill_any_like dtype: %d", x_dtype)); + } + return CreateConst(graph, node, node->inputs, node->outputs, + AttributeMap{ + {"value", out_value}, + {"dims", x_shape}, + {"dtype", VarType2OnnxDtype(dtype)}, + }); +} + +Node *one_hot_handler(Graph *graph, Node *node) { + auto *op = node->Op(); + auto depth = BOOST_GET_CONST(int, op->GetAttr("depth")); + auto allow_out_of_range = + BOOST_GET_CONST(bool, op->GetAttr("allow_out_of_range")); + if (allow_out_of_range) { + PADDLE_THROW(platform::errors::Unimplemented( + "Do not support allow_out_of_range=True")); + } else { + auto depth_tensor = CreateConst(graph, node, {}, {}, + {{"value", std::vector{depth}}, + {"dims", std::vector{1}}, + {"dtype", ONNXDataType::INT64}}); + auto value_tensor = + CreateConst(graph, node, {}, {}, {{"value", std::vector{0, 1}}, + {"dims", std::vector{2}}, + {"dtype", ONNXDataType::FLOAT}}); + return CreateBaseOp(graph, node, "popart_onehot", + {GetInputVarNode("X", node), depth_tensor->outputs[0], + value_tensor->outputs[0]}, + {GetOutputVarNode("Out", node)}, + {{"axis", int64_t{-1}}}); + } +} + +Node *split_handler(Graph *graph, Node *node) { + auto *op = node->Op(); + auto axis = BOOST_GET_CONST(int, op->GetAttr("axis")); + auto sections = BOOST_GET_CONST(std::vector, op->GetAttr("sections")); + return CreateBaseOp( + graph, node, "popart_split", {GetInputVarNode("X", node)}, node->outputs, + {{"num_outputs", int64_t(sections.size())}, + {"axis", int64_t(axis)}, + {"split", std::vector{sections.begin(), sections.end()}}}); +} + REGISTER_HANDLER(fill_constant, fill_constant_handler); REGISTER_HANDLER(gaussian_random, gaussian_random_handler); REGISTER_HANDLER(uniform_random, uniform_random_handler); REGISTER_HANDLER(transpose2, transpose_handler); REGISTER_HANDLER(reshape2, reshape_handler); +REGISTER_HANDLER(flatten2, flatten2_handler); REGISTER_HANDLER(gather, gather_handler); REGISTER_HANDLER(squeeze2, squeeze_handler); REGISTER_HANDLER(cast, cast_handler); @@ -388,6 +510,11 @@ REGISTER_HANDLER(stack, stack_handler); REGISTER_HANDLER(shape, shape_handler); REGISTER_HANDLER(slice, slice_handler); REGISTER_HANDLER(expand, expand_handler); +REGISTER_HANDLER(assign, assign_handler); +REGISTER_HANDLER(fill_any_like, fill_any_like_handler); +REGISTER_HANDLER(lookup_table_v2, lookup_table_v2_handler); +REGISTER_HANDLER(split, split_handler); +REGISTER_HANDLER(one_hot, one_hot_handler); } // namespace } // namespace ipu -- GitLab