未验证 提交 f1111f3c 编写于 作者: A Allen Guo 提交者: GitHub

[IPU] support more ops 1/N (#44205)

* add authors
Co-authored-by: NAllen Guo <alleng@graphcore.ai>
Co-authored-by: NZhixin Yao <zhixiny@graphcore.ai>
Co-authored-by: NZhaorui Chen <zhaoruic@graphcore.ai>

* squash cpp changes 2/N
Co-authored-by: NZhixin Yao <zhixiny@graphcore.ai>
Co-authored-by: NZhaorui Chen <zhaoruic@graphcore.ai>
上级 5988553f
......@@ -376,11 +376,473 @@ Node *dropout_handler(Graph *graph, Node *node) {
}
}
Node *conv2d_transpose_handler(Graph *graph, Node *node) {
auto *op = node->Op();
auto data_format = BOOST_GET_CONST(std::string, op->GetAttr("data_format"));
if (data_format != "NCHW") {
PADDLE_THROW(
platform::errors::InvalidArgument("Only support NCHW as data_format."));
}
auto *kernel_info = GetInputVarNode("Filter", node);
auto kernel_shape = kernel_info->Var()->GetShape();
auto dilations_ = BOOST_GET_CONST(std::vector<int>, op->GetAttr("dilations"));
auto dilations = std::vector<int64_t>{dilations_.begin(), dilations_.end()};
auto strides_ = BOOST_GET_CONST(std::vector<int>, op->GetAttr("strides"));
auto strides = std::vector<int64_t>{strides_.begin(), strides_.end()};
auto output_padding_ =
BOOST_GET_CONST(std::vector<int>, op->GetAttr("output_padding"));
auto output_padding =
std::vector<int64_t>{output_padding_.begin(), output_padding_.end()};
auto group_ = BOOST_GET_CONST(int, op->GetAttr("groups"));
auto group = int64_t(group_);
auto padding_algorithm =
BOOST_GET_CONST(std::string, op->GetAttr("padding_algorithm"));
auto paddings_ = BOOST_GET_CONST(std::vector<int>, op->GetAttr("paddings"));
if (paddings_.size() == 2) {
paddings_.push_back(paddings_[0]);
paddings_.push_back(paddings_[1]);
} else if (paddings_.size() == 4) {
std::swap(paddings_[1], paddings_[2]);
}
auto paddings = std::vector<int64_t>{paddings_.begin(), paddings_.end()};
if (padding_algorithm == "SAME") {
// Update paddings and dilations based on the sizes of H and W.
auto input_shape = GetInputVarNode("Input", node)->Var()->GetShape();
for (auto i = 0; i < 2; i++) {
auto out_size = (input_shape[i + 2] + strides[i] - 1) / strides[i];
auto pad_sum = std::max(
(out_size - 1) * strides[i] + kernel_shape[i] - input_shape[i + 2],
static_cast<int64_t>(0));
auto pad_0 = pad_sum / 2;
auto pad_1 = pad_sum - pad_0;
paddings[i] = pad_0;
paddings[i + 2] = pad_1;
}
for (auto i = 0; i < dilations.size(); i++) {
dilations[i] = 1;
}
} else if (padding_algorithm == "VALID") {
for (auto i = 0; i < paddings.size(); i++) {
paddings[i] = 0;
}
}
auto attrs = AttributeMap{{"dilations", dilations},
{"group", group},
{"kernel_shape", kernel_shape},
{"output_padding", output_padding},
{"pads", paddings},
{"strides", strides}};
if (!op->Input("Bias").empty()) {
return CreateBaseOp(graph,
node,
"popart_convtranspose",
{
GetInputVarNode("Input", node),
GetInputVarNode("Filter", node),
GetInputVarNode("Bias", node),
},
node->outputs,
attrs);
} else {
return CreateBaseOp(graph,
node,
"popart_convtranspose",
{
GetInputVarNode("Input", node),
GetInputVarNode("Filter", node),
},
node->outputs,
attrs);
}
}
Node *affine_channel_handler(Graph *graph, Node *node) {
auto *op = node->Op();
auto data_layout = BOOST_GET_CONST(std::string, op->GetAttr("data_layout"));
if (data_layout != "NCHW") {
PADDLE_THROW(
platform::errors::InvalidArgument("Only support NCHW as data_format."));
}
auto *scale = GetInputVarNode("Scale", node);
auto *bias = GetInputVarNode("Bias", node);
auto scale_shape = scale->Var()->GetShape();
auto bias_shape = bias->Var()->GetShape();
if (scale_shape.size() <= 1 || bias_shape.size() <= 1) {
auto attrs = AttributeMap{{"value", std::vector<int64_t>{1, -1, 1, 1}},
{"dims", std::vector<int64_t>{4}},
{"dtype", ONNXDataType::INT64}};
auto new_shape_const = CreateConst(graph, node, {}, {}, attrs);
scale = CreateBaseOp(graph,
node,
"popart_reshape",
{scale, new_shape_const->outputs[0]},
{},
{})
->outputs[0];
bias = CreateBaseOp(graph,
node,
"popart_reshape",
{bias, new_shape_const->outputs[0]},
{},
{})
->outputs[0];
}
auto *out = CreateBaseOp(
graph, node, "popart_mul", {GetInputVarNode("X", node), scale}, {});
return CreateBaseOp(graph,
node,
"popart_add",
{out->outputs[0], bias},
{GetOutputVarNode("Out", node)});
}
Node *interp_handler(Graph *graph, Node *node, const std::string &mode) {
auto *op = node->Op();
auto data_layout = BOOST_GET_CONST(std::string, op->GetAttr("data_layout"));
if (data_layout != "NCHW") {
PADDLE_THROW(
platform::errors::InvalidArgument("Only support NCHW as data_format."));
}
auto align_corners = BOOST_GET_CONST(bool, op->GetAttr("align_corners"));
auto align_mode = BOOST_GET_CONST(int, op->GetAttr("align_mode"));
auto paddle_target_dtype = VarType::FP32;
auto onnx_target_dtype = ONNXDataType::FLOAT;
if (GetInputVarNode("X", node)->Var()->GetDataType() == VarType::FP16) {
paddle_target_dtype = VarType::FP16;
onnx_target_dtype = ONNXDataType::FLOAT16;
}
std::string coordinate_transformation_mode = "half_pixel";
if (align_corners) {
coordinate_transformation_mode = "align_corners";
} else if (mode == "nearest") {
coordinate_transformation_mode = "asymmetric";
} else if (align_mode == 1 && mode == "cubic") {
coordinate_transformation_mode = "asymmetric";
}
bool has_out_size = node->Op()->Input("OutSize").size() > 0;
bool has_size_tensor = node->Op()->Input("SizeTensor").size() > 0;
bool has_scale_tensor = node->Op()->Input("Scale").size() > 0;
Node *size = nullptr;
Node *scale = nullptr;
// Input: Size and Scale
if (has_out_size) {
// Get 'size' from the tensor
size = GetInputVarNode("OutSize", node);
if (size->Var()->GetDataType() != VarType::INT64) {
size = CreateCast(graph,
node,
{GetInputVarNode("OutSize", node)},
{},
VarType::INT64)
->outputs[0];
}
} else if (has_size_tensor) {
// Get 'size' from multi-tensors
std::vector<Node *> size_nodes;
for (auto var_name : node->Op()->Input("SizeTensor")) {
Node *size_node = GetInputVarNodeByVarName(var_name, node);
if (size_node->Var()->GetDataType() != VarType::INT64) {
size_node = CreateCast(graph, node, {size_node}, {}, VarType::INT64)
->outputs[0];
}
size_nodes.push_back(size_node);
}
size = CreateBaseOp(graph,
node,
"popart_concat",
size_nodes,
{},
{{"axis", int64_t(0)}})
->outputs[0];
} else if (has_scale_tensor) {
// Get 'scale' from tensor
scale = GetInputVarNode("Scale", node);
if (scale->Var()->GetDataType() != paddle_target_dtype) {
scale =
CreateCast(graph, node, {scale}, {}, paddle_target_dtype)->outputs[0];
}
auto *padding = CreateConst(graph,
node,
{},
{},
{{"value", std::vector<float>{1.0, 1.0}},
{"dims", std::vector<int64_t>{2}},
{"dtype", onnx_target_dtype}})
->outputs[0];
scale = CreateBaseOp(graph,
node,
"popart_concat",
{padding, scale},
{},
{{"axis", int64_t(0)}})
->outputs[0];
} else {
// Get 'size' or 'scale' from attribute
auto out_d = BOOST_GET_CONST(int, op->GetAttr("out_d"));
auto out_h = BOOST_GET_CONST(int, op->GetAttr("out_h"));
auto out_w = BOOST_GET_CONST(int, op->GetAttr("out_w"));
if (out_d > 0 || out_w > 0 || out_h > 0) {
std::vector<int64_t> out_size;
if (GetInputVarNode("X", node)->Var()->GetShape().size() == 5) {
out_size.push_back(int64_t(out_d));
out_size.push_back(int64_t(out_h));
} else if (GetInputVarNode("X", node)->Var()->GetShape().size() == 4) {
out_size.push_back(int64_t(out_h));
}
out_size.push_back(int64_t(out_w));
size =
CreateConst(graph,
node,
{},
{},
{{"value", out_size},
{"dims", std::vector<int64_t>{int64_t(out_size.size())}},
{"dtype", ONNXDataType::INT64}})
->outputs[0];
} else {
auto scale_value =
BOOST_GET_CONST(std::vector<float>, op->GetAttr("scale"));
float padding = 1.0;
scale_value.insert(scale_value.begin(), padding);
scale_value.insert(scale_value.begin(), padding);
scale = CreateConst(
graph,
node,
{},
{},
{{"value", scale_value},
{"dims", std::vector<int64_t>{int64_t(scale_value.size())}},
{"dtype", onnx_target_dtype}})
->outputs[0];
}
}
Node *roi =
CreateConst(
graph,
node,
{},
{},
{{"value",
std::vector<float>(
GetInputVarNode("X", node)->Var()->GetShape().size() * 2, 1.0)},
{"dims",
std::vector<int64_t>{int64_t(
GetInputVarNode("X", node)->Var()->GetShape().size() * 2)}},
{"dtype", onnx_target_dtype}})
->outputs[0];
if (size != nullptr) {
Node *input_shape =
CreateBaseOp(
graph, node, "popart_shape", {GetInputVarNode("X", node)}, {})
->outputs[0];
Node *start = CreateConst(graph,
node,
std::vector<int>{0},
std::vector<int64_t>{1},
ONNXDataType::INT32)
->outputs[0];
Node *end = CreateConst(graph,
node,
std::vector<int>{2},
std::vector<int64_t>{1},
ONNXDataType::INT32)
->outputs[0];
Node *axes = CreateConst(graph,
node,
std::vector<int>{0},
std::vector<int64_t>{1},
ONNXDataType::INT32)
->outputs[0];
Node *nc = CreateBaseOp(graph,
node,
"popart_slice",
{input_shape, start, end, axes},
{},
{})
->outputs[0];
size = CreateBaseOp(graph,
node,
"popart_concat",
{nc, size},
{},
{{"axis", int64_t(0)}})
->outputs[0];
}
auto resize_attrs = AttributeMap{
{"coordinate_transformation_mode", coordinate_transformation_mode},
{"cubic_coeff_a", float{-0.75}},
{"exclude_outside", int64_t{0}},
{"extrapolation_value", float{0.0}},
{"mode", mode},
{"nearest_mode", std::string("round_prefer_floor")}};
if (mode == "nearest" && coordinate_transformation_mode == "asymmetric") {
resize_attrs.at("nearest_mode") = std::string("floor");
}
return CreateBaseOp(graph,
node,
"popart_resize",
{GetInputVarNode("X", node), roi, scale, size},
{GetOutputVarNode("Out", node)},
resize_attrs);
}
Node *bilinear_interp_v2_handler(Graph *graph, Node *node) {
return interp_handler(graph, node, "linear");
}
Node *nearest_interp_v2_handler(Graph *graph, Node *node) {
return interp_handler(graph, node, "nearest");
}
Node *bicubic_interp_v2_handler(Graph *graph, Node *node) {
return interp_handler(graph, node, "cubic");
}
Node *linear_interp_v2_handler(Graph *graph, Node *node) {
return interp_handler(graph, node, "linear");
}
Node *trilinear_interp_v2_handler(Graph *graph, Node *node) {
return interp_handler(graph, node, "linear");
}
Node *data_norm_handler(Graph *graph, Node *node) {
auto *op = node->Op();
int slot_dim = -1;
if (op->HasAttr("slot_dim")) {
slot_dim = BOOST_GET_CONST(int, op->GetAttr("slot_dim"));
}
if (slot_dim > 0) {
PADDLE_THROW(
platform::errors::InvalidArgument("slot_dim > 0 is not supported."));
}
bool enable_scale_and_shift = false;
if (op->HasAttr("enable_scale_and_shift")) {
enable_scale_and_shift =
BOOST_GET_CONST(bool, op->GetAttr("enable_scale_and_shift"));
}
auto *mean_arr = CreateBaseOp(graph,
node,
"popart_div",
{GetInputVarNode("BatchSum", node),
GetInputVarNode("BatchSize", node)},
{})
->outputs[0];
auto *scale_arr = CreateBaseOp(graph,
node,
"popart_div",
{GetInputVarNode("BatchSize", node),
GetInputVarNode("BatchSquareSum", node)},
{})
->outputs[0];
scale_arr =
CreateBaseOp(graph, node, "popart_sqrt", {scale_arr}, {})->outputs[0];
auto out =
CreateBaseOp(
graph, node, "popart_sub", {GetInputVarNode("X", node), mean_arr}, {})
->outputs[0];
if (enable_scale_and_shift) {
auto scale_res = CreateBaseOp(graph,
node,
"popart_mul",
{out, GetInputVarNode("scale_w", node)},
{})
->outputs[0];
return CreateBaseOp(graph,
node,
"popart_add",
{scale_res, GetInputVarNode("bias", node)},
{GetOutputVarNode("Y", node)});
} else {
return CreateBaseOp(graph,
node,
"popart_mul",
{out, scale_arr},
{GetOutputVarNode("Y", node)});
}
}
Node *pad_handler(Graph *graph, Node *node) {
auto *op = node->Op();
auto mode = BOOST_GET_CONST(std::string, op->GetAttr("mode"));
auto value = BOOST_GET_CONST(float, op->GetAttr("value"));
auto data_format = BOOST_GET_CONST(std::string, op->GetAttr("data_format"));
if (data_format == "NDHWC") {
PADDLE_THROW(
platform::errors::Unimplemented("NDHWC format is not supported."));
}
if (mode == "replicate" || mode == "circular") {
PADDLE_THROW(platform::errors::Unimplemented(
"circular and replicate modes are not supported."));
}
if (op->Input("Paddings").size()) {
// Paddings -> input tensor
// PopART Pad Op only support `pad` as a constant
PADDLE_THROW(platform::errors::Unimplemented(
"Do not support Paddings as a inputs tensor"));
}
// Paddings -> Attr
auto paddings = BOOST_GET_CONST(std::vector<int>, op->GetAttr("paddings"));
std::vector<int64_t> new_paddings(10, 0);
new_paddings[2] = paddings[4];
new_paddings[3] = paddings[2];
new_paddings[4] = paddings[0];
new_paddings[7] = paddings[5];
new_paddings[8] = paddings[3];
new_paddings[9] = paddings[1];
auto *paddings_node = CreateConst(graph,
node,
new_paddings,
std::vector<int64_t>{10},
ONNXDataType::INT64)
->outputs[0];
auto *value_node = CreateConst(graph,
node,
std::vector<float>{value},
std::vector<int64_t>{1},
ONNXDataType::FLOAT)
->outputs[0];
return CreateBaseOp(graph,
node,
"popart_pad",
{GetInputVarNode("X", node), paddings_node, value_node},
{GetOutputVarNode("Out", node)},
{{"mode", mode}});
}
} // namespace
} // namespace ipu
} // namespace platform
} // namespace paddle
REGISTER_HANDLER(affine_channel, affine_channel_handler);
REGISTER_HANDLER(pool2d, pool2d_handler);
REGISTER_HANDLER(max_pool2d_with_index, max_pool2d_with_index_handler);
REGISTER_HANDLER(batch_norm, batch_norm_handler);
......@@ -388,4 +850,12 @@ REGISTER_HANDLER(group_norm, group_norm_handler);
REGISTER_HANDLER(instance_norm, instance_norm_handler);
REGISTER_HANDLER(layer_norm, layer_norm_handler);
REGISTER_HANDLER(conv2d, conv2d_handler);
REGISTER_HANDLER(conv2d_transpose, conv2d_transpose_handler);
REGISTER_HANDLER(dropout, dropout_handler);
REGISTER_HANDLER(bilinear_interp_v2, bilinear_interp_v2_handler);
REGISTER_HANDLER(nearest_interp_v2, nearest_interp_v2_handler);
REGISTER_HANDLER(bicubic_interp_v2, bicubic_interp_v2_handler);
REGISTER_HANDLER(linear_interp_v2, linear_interp_v2_handler);
REGISTER_HANDLER(trilinear_interp_v2, trilinear_interp_v2_handler);
REGISTER_HANDLER(data_norm, data_norm_handler);
REGISTER_HANDLER(pad3d, pad_handler);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册