// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "lite/kernels/apu/bridges/graph.h" #include "lite/kernels/apu/bridges/utility.h" #include "lite/kernels/npu/bridges/registry.h" namespace paddle { namespace lite { namespace subgraph { namespace apu { int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) { CHECK(ctx != nullptr); CHECK(op != nullptr); auto graph = static_cast(ctx); auto model = graph->model(); auto op_info = op->op_info(); auto op_type = op_info->Type(); auto scope = op->scope(); VLOG(3) << "[APU] Converting [" + op_type + "]"; auto libHandle = graph->libHandle(); LOAD_FUNCTIONS(libHandle, NeuronModel_addOperand, neuron_model_addOperand) LOAD_FUNCTIONS( libHandle, NeuronModel_setOperandValue, neuron_model_setOperandValue) LOAD_FUNCTIONS(libHandle, NeuronModel_addOperation, neuron_model_addOperation) auto input_name = op_info->Input("Input").front(); auto input = scope->FindMutableTensor(input_name); auto input_dims = input->dims(); CHECK_GE(input_dims.size(), 2UL); auto w_name = op_info->Input("W").front(); auto w = scope->FindMutableTensor(w_name); auto w_dims = w->dims(); CHECK_EQ(w_dims.size(), 2UL); auto out_name = op_info->Output("Out").front(); auto out = scope->FindMutableTensor(out_name); auto out_dims = out->dims(); int in_num_col_dims = op_info->GetAttr("in_num_col_dims"); int m = input_dims.Slice(0, in_num_col_dims).production(); int k = input_dims.Slice(in_num_col_dims, input_dims.size()).production(); int n = w_dims[1]; CHECK_EQ(k * n, w_dims.production()); VLOG(3) << "[APU] input dims: " << input_dims << " w dims: " << w_dims << " out_dims: " << out_dims << " m: " << m << " k: " << k << " n: " << n; float input_scale = 1.0f; float out_scale = 1.0f; std::vector w_scale; if (op_info->HasAttr("enable_int8")) { if (op_info->GetAttr("enable_int8")) { if (op_info->HasAttr("input_scale")) input_scale = op_info->GetAttr("input_scale"); if (op_info->HasAttr("weight_scale")) w_scale = op_info->GetAttr>("weight_scale"); if (op_info->HasAttr("output_scale")) out_scale = op_info->GetAttr("output_scale"); } else { return FAILED; } } else { return FAILED; } // Add input tensor type NeuronOperandType inType; inType.type = NEURON_TENSOR_QUANT8_ASYMM; inType.scale = input_scale; inType.zeroPoint = 128; inType.dimensionCount = input_dims.size(); std::vector dims_in = {(uint32_t)input_dims[0], (uint32_t)input_dims[2], (uint32_t)input_dims[3], (uint32_t)input_dims[1]}; inType.dimensions = &dims_in[0]; std::shared_ptr in_node = nullptr; if (graph->Has(input_name)) { // input operand already exist in_node = graph->Get(input_name); VLOG(3) << "Graph has " << input_name << ",index: " << in_node->index(); } else { // add input operand (*neuron_model_addOperand)(model, &inType); // 0: input in_node = graph->Add(input_name, dims_in); } VLOG(3) << "input_scale: " << input_scale << ", inType: " << inType.dimensions[0] << " : " << inType.dimensions[1] << " : " << inType.dimensions[2] << " : " << inType.dimensions[3]; NeuronOperandType wType; wType.type = NEURON_TENSOR_QUANT8_ASYMM; wType.scale = w_scale[0]; wType.zeroPoint = 128; wType.dimensionCount = w_dims.size(); std::vector dims_w = {(uint32_t)w_dims[1], (uint32_t)w_dims[0]}; wType.dimensions = &dims_w[0]; (*neuron_model_addOperand)(model, &wType); // 1: weight std::shared_ptr w_node = nullptr; w_node = graph->Add(w_name, dims_w); VLOG(3) << "w_scale size: " << w_scale.size() << ",w_scale: " << w_scale[0] << ", wType dimensions: " << wType.dimensions[0] << " : " << wType.dimensions[1] << ", memory size: " << w->memory_size(); // Add bias type NeuronOperandType biasType; biasType.type = NEURON_TENSOR_INT32; biasType.zeroPoint = 0; biasType.scale = input_scale * w_scale[0]; std::shared_ptr bias_node = nullptr; if (HasInputArg(op_info, scope, "Bias")) { auto bias_name = op_info->Input("Bias").front(); auto bias_type = kernel->GetInputDeclType("Bias"); auto bias = scope->FindMutableTensor(bias_name); auto bias_dims = bias->dims(); biasType.dimensionCount = bias_dims.size(); std::vector dims_bias = {(uint32_t)bias_dims[0]}; biasType.dimensions = &dims_bias[0]; (*neuron_model_addOperand)(model, &biasType); // 2: bias bias_node = graph->Add(bias_name, dims_bias); VLOG(3) << "Bias name: " << bias_name << ", bias dims: " << bias_dims << ", bias scale: " << biasType.scale << " ,memory size: " << bias->memory_size(); } else { biasType.dimensionCount = 1; std::vector dims_bias = {(uint32_t)n}; biasType.dimensions = &dims_bias[0]; (*neuron_model_addOperand)(model, &biasType); // 2: bias bias_node = graph->Add(w_name + "_default_bias", dims_bias); } // Add fuse type NeuronOperandType fuseType; fuseType.type = NEURON_INT32; fuseType.dimensionCount = 0; std::vector dims_int32 = {0}; (*neuron_model_addOperand)(model, &fuseType); // 3: fuse std::shared_ptr fuse_node = nullptr; fuse_node = graph->Add(w_name + "_fuse", dims_int32); // Add output tensor type NeuronOperandType outType; outType.type = NEURON_TENSOR_QUANT8_ASYMM; outType.scale = out_scale; outType.zeroPoint = 128; outType.dimensionCount = 2; std::vector dims_out = {(uint32_t)out_dims[0], out_dims[1]}; outType.dimensions = &dims_out[0]; VLOG(3) << "out_scale: " << out_scale << ", outType: " << outType.dimensions[0] << " : " << outType.dimensions[1]; (*neuron_model_addOperand)(model, &outType); // output std::shared_ptr out_node = nullptr; out_node = graph->Add(out_name, dims_out); int8_t* w_data = w->mutable_data(); Tensor transpose_filter; // Original dimension transpose_filter.Resize({(uint32_t)w_dims[1], (uint32_t)w_dims[0]}); transpose_filter.mutable_data(); transposeAsym(w->data(), transpose_filter.mutable_data(), {1, 1, (uint32_t)w_dims[0], (uint32_t)w_dims[1]}, {0, 1, 3, 2}); memcpy(w->mutable_data(), transpose_filter.mutable_data(), w->memory_size()); int neuron_errCode = (*neuron_model_setOperandValue)( model, w_node->index(), w->raw_data(), w->memory_size()); if (NEURON_NO_ERROR != neuron_errCode) { LOG(WARNING) << "Set W operand value fail:" << neuron_errCode << ",index: " << w_node->index(); return FAILED; } // Add bias if bias tensor exists if (HasInputArg(op_info, scope, "Bias")) { auto bias_name = op_info->Input("Bias").front(); auto bias = scope->FindMutableTensor(bias_name); int32_t* int32_bias_data = reinterpret_cast(bias->mutable_data()); float2int32(bias->data(), input_scale, w_scale, int32_bias_data); VLOG(3) << int32_bias_data[0] << ":" << int32_bias_data[1] << ":" << int32_bias_data[2] << ":" << int32_bias_data[3]; neuron_errCode = (*neuron_model_setOperandValue)(model, bias_node->index(), bias->raw_data(), bias->memory_size()); // 2: bias } else { auto int32_bias = std::make_shared(); int32_bias->Resize({1, out_dims[1]}); int32_bias->mutable_data(); memset(int32_bias->mutable_data(), 0, int32_bias->memory_size()); VLOG(3) << "default: " << int32_bias->memory_size(); neuron_errCode = (*neuron_model_setOperandValue)(model, bias_node->index(), int32_bias->raw_data(), int32_bias->memory_size()); // 2: bias bias_node->set_data(int32_bias); } // Add fuse value int32_t fuse_val[1] = {0}; (*neuron_model_setOperandValue)( model, fuse_node->index(), fuse_val, sizeof(int32_t) * 1); // 3: fuse std::vector addInIndex = {in_node->index(), w_node->index(), bias_node->index(), fuse_node->index()}; std::vector addOutIndex = {out_node->index()}; neuron_errCode = (*neuron_model_addOperation)(model, NEURON_FULLY_CONNECTED, addInIndex.size(), &addInIndex[0], addOutIndex.size(), &addOutIndex[0]); if (NEURON_NO_ERROR != neuron_errCode) { LOG(WARNING) << "Add op fail:" << op_type; return FAILED; } return REBUILD_WHEN_SHAPE_CHANGED; } } // namespace apu } // namespace subgraph } // namespace lite } // namespace paddle REGISTER_SUBGRAPH_BRIDGE(fc, kAPU, paddle::lite::subgraph::apu::FCConverter);