// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "lite/operators/conv_op.h" #include #include "lite/kernels/npu/bridges/registry.h" #include "lite/kernels/rknpu/bridges/graph.h" #include "lite/kernels/rknpu/bridges/utility.h" namespace paddle { namespace lite { namespace subgraph { namespace rknpu { int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK(ctx != nullptr); CHECK(op != nullptr); auto graph = static_cast(ctx); auto op_info = op->op_info(); auto op_type = op_info->Type(); auto scope = op->scope(); VLOG(3) << "[RKNPU] Converting " << op_type << "... "; // Get input and output vars and op attributes auto input_name = op_info->Input("Input").front(); auto input = scope->FindMutableTensor(input_name); auto input_dims = input->dims(); auto filter_name = op_info->Input("Filter").front(); auto filter = scope->FindMutableTensor(filter_name); auto filter_dims = filter->dims(); auto output_name = op_info->Output("Output").front(); auto output = scope->FindMutableTensor(output_name); auto output_dims = output->dims(); auto bs = input_dims[0]; auto ic = input_dims[1]; auto oc = filter_dims[0]; CHECK_EQ(input_dims.size(), 4L); CHECK_EQ(output_dims.size(), 4L); CHECK_EQ(filter_dims.size(), 4L); CHECK_EQ(output_dims[0], bs); CHECK_EQ(output_dims[1], oc); auto strides = op_info->GetAttr>("strides"); auto paddings = op_info->GetAttr>("paddings"); auto groups = op_info->GetAttr("groups"); auto dilations = op_info->GetAttr>("dilations"); auto fuse_relu = op_info->GetAttr("fuse_relu"); CHECK_EQ(strides.size(), 2L); CHECK_EQ(dilations.size(), 2L); // Check depthwise mode bool is_depthwise_mode = (ic == groups && oc == groups && groups != 1); CHECK(op_info->HasInputScale(filter_name)); auto weight_scale = op_info->GetInputScale(filter_name); // for quantization bool enable_int8 = false; float input_scale = 1.0; float output_scale = 1.0; int bit_length = 8; DataLayoutType layout = DATALAYOUT(kNCHW); PrecisionType precision = PRECISION(kFloat); if (op_info->HasAttr("enable_int8")) { enable_int8 = op_info->GetAttr("enable_int8"); CHECK(op_info->HasInputScale(input_name)); input_scale = op_info->GetInputScale(input_name)[0]; bit_length = op_info->GetAttr("bit_length"); CHECK(op_info->HasOutputScale(output_name)); output_scale = op_info->GetOutputScale(output_name)[0]; if (enable_int8) { precision = PRECISION(kInt8); } } // // Input node std::shared_ptr input_node = nullptr; if (graph->Has(input_name)) { input_node = graph->Get(input_name); } else { QuantizationInfo qnt; qnt.enable_int8 = enable_int8; if (enable_int8) { qnt.scale.clear(); qnt.scale.push_back(input_scale); qnt.quant_bits = bit_length; } input_node = graph->Add(input_name, *input, input->precision(), layout, qnt); } if (paddings.size() == 2L) { for (size_t i = 0; i < strides.size(); ++i) { int copy_pad = *(paddings.begin() + 2 * i); paddings.insert(paddings.begin() + 2 * i + 1, copy_pad); } } CHECK_EQ(paddings.size(), 4L) << "[NPU] Paddings size should be the same or twice as the input size."; std::string padding_algorithm(""); if (op_info->HasAttr("padding_algorithm")) { padding_algorithm = op_info->GetAttr("padding_algorithm"); } operators::UpdatePaddingAndDilation(&paddings, &dilations, strides, padding_algorithm, input_dims, filter_dims); // Filter node std::shared_ptr filter_node = nullptr; QuantizationInfo filter_qnt; filter_qnt.enable_int8 = enable_int8; if (enable_int8) { filter_qnt.scale = weight_scale; filter_qnt.quant_bits = bit_length; } filter_node = graph->Add(filter_name, *filter, filter->precision(), layout, filter_qnt); // Add bias node if exists bias // Supports the bias nodes with the following dimensions // 0: {oc} std::shared_ptr bias_node = nullptr; if (HasInputArg(op_info, scope, "Bias")) { auto bias_name = op_info->Input("Bias").front(); if (graph->Has(bias_name)) { bias_node = graph->Get(bias_name); } else { auto bias = scope->FindMutableTensor(bias_name); auto bias_dims = bias->dims(); auto bias_data_size = bias_dims.production(); auto output_data_size = output_dims.production(); std::vector bias_shape; if (bias_data_size == oc) { // 0: {oc} bias_shape = {oc}; } else { LOG(WARNING) << "[RKNPU] Bias dimension " << bias_dims << " isn't supported in conv2d Op when output dimension is " << output_dims; return FAILED; } if (enable_int8) { auto bias_name_qnt = bias_name + "/qnt"; auto* bias_qnt = scope->NewTensor(bias_name_qnt); bias_qnt->Resize(bias_shape); bias_qnt->set_persistable(true); bias_qnt->set_precision(PrecisionType::kInt32); auto* bias_qnt_data = bias_qnt->mutable_data(); auto* bias_data = bias->mutable_data(); QuantizationInfo qnt; qnt.enable_int8 = enable_int8; qnt.quant_bits = 32; qnt.scale.resize(weight_scale.size()); for (int i = 0; i < weight_scale.size(); i++) { qnt.scale[i] = input_scale * weight_scale[i]; } auto dtype_max = static_cast((1 << (qnt.quant_bits - 1)) - 1); auto dtype_min = static_cast(0 - dtype_max); for (int i = 0; i < oc; i++) { bias_qnt_data[i] = std::min(std::max(static_cast(bias_data[i] / qnt.scale[i]), dtype_min), dtype_max); } bias_node = graph->Add( bias_name, *bias_qnt, bias_qnt->precision(), layout, qnt); } else { bias_node = graph->Add(bias_name, *bias, bias_shape); } } } else { auto bias_name = filter_name + "/bias/dummy"; auto* bias = scope->NewTensor(bias_name); std::vector bias_shape = {oc}; bias->Resize(bias_shape); bias->set_persistable(true); if (enable_int8) { bias->set_precision(PrecisionType::kInt32); auto* bias_data = bias->mutable_data(); for (int i = 0; i < oc; i++) { bias_data[i] = 0; } QuantizationInfo qnt; qnt.enable_int8 = enable_int8; qnt.quant_bits = 32; qnt.scale.resize(weight_scale.size()); for (int i = 0; i < weight_scale.size(); i++) { qnt.scale[i] = input_scale * weight_scale[i]; } bias_node = graph->Add(bias_name, *bias, bias->precision(), layout, qnt); } else { bias->set_precision(PrecisionType::kFloat); auto* bias_data = bias->mutable_data(); for (int i = 0; i < oc; i++) { bias_data[i] = 0.0; } bias_node = graph->Add(bias_name, *bias, bias_shape); } } // Conv node std::shared_ptr conv_node = nullptr; std::shared_ptr output_node = nullptr; std::vector> inputs; std::vector> outputs; QuantizationInfo output_qnt; output_qnt.enable_int8 = enable_int8; if (enable_int8) { output_qnt.quant_bits = bit_length; output_qnt.scale.push_back(output_scale); output->mutable_data(); } output_node = graph->Add(output_name, *output, precision, layout, output_qnt); inputs.push_back(input_node->data()); inputs.push_back(filter_node->data()); inputs.push_back(bias_node->data()); outputs.push_back(output_node->data()); rk::nn::Conv2DAttr attr; attr.ksize[0] = filter_dims[2]; attr.ksize[1] = filter_dims[3]; attr.stride[0] = strides[0]; attr.stride[1] = strides[1]; attr.pad[0] = paddings[0]; attr.pad[1] = paddings[1]; attr.pad[2] = paddings[2]; attr.pad[3] = paddings[3]; attr.group = groups; attr.weights = oc; attr.dilation[0] = dilations[0]; attr.dilation[1] = dilations[1]; attr.pad_type = rk::nn::PadType::AUTO; attr.has_relu = fuse_relu; if (is_depthwise_mode) { attr.multiplier = 1; } else { attr.multiplier = 0; } auto rGraph = graph->GetHandle(); auto conv = rGraph->AddOperator( rk::nn::OperatorType::CONV2D, inputs, outputs, &attr, output_name); return REBUILD_WHEN_SHAPE_CHANGED; } } // namespace rknpu } // namespace subgraph } // namespace lite } // namespace paddle REGISTER_SUBGRAPH_BRIDGE(conv2d, kRKNPU, paddle::lite::subgraph::rknpu::ConvConverter); REGISTER_SUBGRAPH_BRIDGE(depthwise_conv2d, kRKNPU, paddle::lite::subgraph::rknpu::ConvConverter);