// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "lite/core/subgraph_bridge_registry.h" #include "lite/kernels/apu/bridges/graph.h" #include "lite/kernels/apu/bridges/utility.h" namespace paddle { namespace lite { namespace subgraph { namespace apu { int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK(ctx != nullptr); CHECK(op != nullptr); auto graph = static_cast(ctx); auto model = graph->model(); auto op_info = op->op_info(); auto op_type = op_info->Type(); auto scope = op->scope(); int neuron_errCode; VLOG(3) << "[APU] Converting [" + op_type + "]"; // Get input and output vars and op attributes auto x_name = op_info->Input("X").front(); auto x = scope->FindTensor(x_name); auto x_dims = x->dims(); auto y_name = op_info->Input("Y").front(); auto y = scope->FindTensor(y_name); auto y_dims = y->dims(); auto out_name = op_info->Output("Out").front(); auto out = scope->FindTensor(out_name); auto out_dims = out->dims(); auto axis = op_info->GetAttr("axis"); if (axis < 0) { axis = x_dims.size() - y_dims.size(); } auto x_shape = x_dims.Vectorize(); auto y_shape = y_dims.Vectorize(); // Two dimensions are compatible when: // 1. they are equal, or // 2. one of them is 1 for (int i = axis; i < x_shape.size(); i++) { if (x_dims[i] != y_dims[i - axis]) { // Input 1 compatible dimensions as input0 if (y_dims[i - axis] != 1) { LOG(WARNING) << i << ":" << axis << ":" << y_dims[i - axis]; return FAILED; } } } // End of for int32_t fuse_val[1] = {NEURON_FUSED_NONE}; // Act node if (op_type == "fusion_elementwise_add_activation" || op_type == "fusion_elementwise_sub_activation" || op_type == "fusion_elementwise_mul_activation" || op_type == "fusion_elementwise_div_activation") { auto act_type = op_info->GetAttr("act_type"); if (act_type == "relu") { fuse_val[0] = NEURON_FUSED_RELU; } else if (act_type == "relu1") { fuse_val[0] = NEURON_FUSED_RELU1; } else if (act_type == "relu6") { fuse_val[0] = NEURON_FUSED_RELU6; } else if (!act_type.empty()) { fuse_val[0] = NEURON_FUSED_NONE; LOG(WARNING) << "Support act_type: " << act_type; return FAILED; } } // End of if VLOG(3) << "x_name" << x_name; CHECK(op_info->HasInputScale(x_name)); auto x_scale = op_info->GetInputScale(x_name)[0]; CHECK(op_info->HasInputScale(y_name)); auto y_scale = op_info->GetInputScale(y_name)[0]; CHECK(op_info->HasOutputScale(out_name)); auto out_scale = op_info->GetOutputScale(out_name)[0]; // Add x tensor type NeuronOperandType xType; xType.type = NEURON_TENSOR_QUANT8_ASYMM; xType.scale = x_scale; xType.zeroPoint = 128; xType.dimensionCount = x_dims.size(); std::vector dims_x = {(uint32_t)x_dims[0], (uint32_t)x_dims[2], (uint32_t)x_dims[3], (uint32_t)x_dims[1]}; xType.dimensions = &dims_x[0]; std::shared_ptr x_node = nullptr; if (graph->Has(x_name)) { VLOG(3) << "Graph has " << x_name; if (graph->IsInput(x_name)) { VLOG(3) << x_name << "is input and already exist"; x_name = "transpose_" + x_name; } if (graph->IsOutput(x_name)) { VLOG(3) << x_name << "is input and output node"; x_name = "transpose_" + x_name; } x_node = graph->Get(x_name); } else { if (graph->IsInput(x_name)) { insert_transpose_node(ctx, x_name, "transpose_" + x_name, {(uint32_t)x_dims[0], (uint32_t)x_dims[1], (uint32_t)x_dims[2], (uint32_t)x_dims[3]}, dims_x, {0, 2, 3, 1}, xType.scale, xType.zeroPoint); // Change x name after insert transpose op for x data relayout x_name = "transpose_" + x_name; x_node = graph->Get(x_name); } else { NeuronModel_addOperand(model, &xType); x_node = graph->Add(x_name, dims_x); } } // End of else VLOG(3) << "x node idx: " << x_node->index() << "x_dims: " << x_dims << ": x_scale: " << x_scale << ", xType: " << xType.dimensions[0] << ":" << xType.dimensions[1] << ":" << xType.dimensions[2] << ":" << xType.dimensions[3]; // Add y tensor type NeuronOperandType yType; yType.type = NEURON_TENSOR_QUANT8_ASYMM; yType.scale = y_scale; yType.zeroPoint = 128; yType.dimensionCount = y_dims.size(); std::vector dims_y = {(uint32_t)y_dims[0], (uint32_t)y_dims[2], (uint32_t)y_dims[3], (uint32_t)y_dims[1]}; yType.dimensions = &dims_y[0]; std::shared_ptr y_node = nullptr; if (graph->Has(y_name)) { VLOG(3) << "Graph has " << y_name; y_node = graph->Get(y_name); } else { if (graph->IsInput(y_name)) { insert_transpose_node(ctx, y_name, "transpose_" + y_name, {(uint32_t)y_dims[0], (uint32_t)y_dims[1], (uint32_t)y_dims[2], (uint32_t)y_dims[3]}, dims_y, {0, 2, 3, 1}, yType.scale, yType.zeroPoint); y_name = "transpose_" + y_name; y_node = graph->Get(y_name); } else { NeuronModel_addOperand(model, &yType); y_node = graph->Add(y_name, dims_y); } } VLOG(3) << "y node idx: " << y_node->index() << "y_dims: " << y_dims << ": y_scale: " << y_scale << ", yType: " << yType.dimensions[0] << ":" << yType.dimensions[1] << ":" << yType.dimensions[2] << ":" << yType.dimensions[3]; // Add fuse operand type NeuronOperandType int32Type; int32Type.type = NEURON_INT32; int32Type.dimensionCount = 0; std::vector dims_int32 = {1}; // Add fuse operand std::shared_ptr fuse_node = nullptr; NeuronModel_addOperand(model, &int32Type); // Operand 2: fuse fuse_node = graph->Add(out_name + "_fuse", dims_int32); // Add out tensor type NeuronOperandType outType; outType.type = NEURON_TENSOR_QUANT8_ASYMM; outType.scale = out_scale; outType.zeroPoint = 128; outType.dimensionCount = out_dims.size(); std::vector dims_out = {(uint32_t)out_dims[0], (uint32_t)out_dims[2], (uint32_t)out_dims[3], (uint32_t)out_dims[1]}; outType.dimensions = &dims_out[0]; std::shared_ptr out_node = nullptr; if (graph->Has(out_name)) { VLOG(3) << "Graph has " << out_name; out_node = graph->Get(out_name); } else { if (graph->IsOutput(out_name)) { NeuronModel_addOperand(model, &outType); out_node = graph->Add("transpose_" + out_name, dims_out); } else { NeuronModel_addOperand(model, &outType); out_node = graph->Add(out_name, dims_out); } } VLOG(3) << "out node idx: " << out_node->index() << "out_dims: " << out_dims << ": out_scale: " << out_scale << ", outType: " << outType.dimensions[0] << ":" << outType.dimensions[1] << ":" << outType.dimensions[2] << ":" << outType.dimensions[3]; // Set fuse value NeuronModel_setOperandValue( model, fuse_node->index(), fuse_val, sizeof(int32_t) * 1); std::vector addInIndex = { x_node->index(), // 0: A tensor y_node->index(), // 1: A tensor of the same OperandCode, // and compatible dimensions as input 0 fuse_node->index()}; // 2: fuse std::vector addOutIndex = {out_node->index()}; if (op_type == "elementwise_add" || op_type == "fusion_elementwise_add_activation") { neuron_errCode = NeuronModel_addOperation(model, NEURON_ADD, addInIndex.size(), &addInIndex[0], addOutIndex.size(), &addOutIndex[0]); } else { LOG(WARNING) << "[APU] Unsupported op type: " << op_type; return FAILED; } if (NEURON_NO_ERROR != neuron_errCode) { LOG(WARNING) << "ADD op fail:" << op_type; return FAILED; } if (graph->IsOutput(out_name)) { // Insert transpose for NHWC -> NCHW insert_transpose_node(ctx, "transpose_" + out_name, out_name, dims_out, {(uint32_t)out_dims[0], (uint32_t)out_dims[1], (uint32_t)out_dims[2], (uint32_t)out_dims[3]}, {0, 3, 1, 2}, outType.scale, outType.zeroPoint); out_node = graph->Get(out_name); if (out_node == nullptr) return FAILED; } return REBUILD_WHEN_SHAPE_CHANGED; } } // namespace apu } // namespace subgraph } // namespace lite } // namespace paddle REGISTER_SUBGRAPH_BRIDGE(elementwise_add, kAPU, paddle::lite::subgraph::apu::ElementwiseConverter); REGISTER_SUBGRAPH_BRIDGE(elementwise_mul, kAPU, paddle::lite::subgraph::apu::ElementwiseConverter); REGISTER_SUBGRAPH_BRIDGE(fusion_elementwise_add_activation, kAPU, paddle::lite::subgraph::apu::ElementwiseConverter);