// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "lite/api/light_api.h" #include #include #include "paddle_use_kernels.h" // NOLINT #include "paddle_use_ops.h" // NOLINT namespace paddle { namespace lite { void LightPredictor::Build(const std::string& lite_model_file, bool model_from_memory) { if (model_from_memory) { LoadModelNaiveFromMemory(lite_model_file, scope_.get(), &cpp_program_desc_); } else { LoadModelNaiveFromFile(lite_model_file, scope_.get(), &cpp_program_desc_); } // For weight quantization of post training, load the int8/16 weights // for optimized model, and dequant it to fp32. DequantizeWeight(); BuildRuntimeProgram(cpp_program_desc_); PrepareFeedFetch(); } void LightPredictor::Build(const std::string& model_dir, const std::string& model_buffer, const std::string& param_buffer, lite_api::LiteModelType model_type, bool model_from_memory) { switch (model_type) { #ifndef LITE_ON_TINY_PUBLISH case lite_api::LiteModelType::kProtobuf: LoadModelPb(model_dir, "", "", scope_.get(), &cpp_program_desc_); break; #endif case lite_api::LiteModelType::kNaiveBuffer: { if (model_from_memory) { LoadModelNaiveFromMemory( model_buffer, param_buffer, scope_.get(), &cpp_program_desc_); } else { LoadModelNaive(model_dir, scope_.get(), &cpp_program_desc_); } break; } default: LOG(FATAL) << "Unknown model type"; } DequantizeWeight(); BuildRuntimeProgram(cpp_program_desc_); PrepareFeedFetch(); } Tensor* LightPredictor::GetInput(size_t offset) { CHECK(input_names_.size() > offset) << "The network has " << input_names_.size() << " inputs" << ", the offset should be less than this."; auto* in_var = program_->exec_scope()->FindVar(input_names_[offset]); CHECK(in_var) << "no fatch variable " << input_names_[offset] << " in exec_scope"; return in_var->GetMutable(); } // get input by name Tensor* LightPredictor::GetInputByName(const std::string& name) { auto element = std::find(input_names_.begin(), input_names_.end(), name); if (element == input_names_.end()) { LOG(ERROR) << "Model do not have input named with: [" << name << "], model's inputs include:"; for (int i = 0; i < input_names_.size(); i++) { LOG(ERROR) << "[" << input_names_[i] << "]"; } return nullptr; } else { int position = std::distance(input_names_.begin(), element); return GetInput(position); } } const Tensor* LightPredictor::GetOutput(size_t offset) { CHECK(output_names_.size() > offset) << "The network has " << output_names_.size() << " outputs" << ", the offset should be less than this."; auto* out_var = program_->exec_scope()->FindVar(output_names_.at(offset)); CHECK(out_var) << "no fatch variable " << output_names_.at(offset) << " in exec_scope"; return out_var->GetMutable(); } // get inputs names std::vector LightPredictor::GetInputNames() { return input_names_; } // get outputnames std::vector LightPredictor::GetOutputNames() { return output_names_; } // append the names of inputs and outputs into input_names_ and output_names_ void LightPredictor::PrepareFeedFetch() { auto current_block = cpp_program_desc_.GetBlock(0); std::vector feeds; std::vector fetchs; for (int i = 0; i < current_block->OpsSize(); i++) { auto op = current_block->GetOp(i); if (op->Type() == "feed") { feeds.push_back(op); } else if (op->Type() == "fetch") { fetchs.push_back(op); } } input_names_.resize(feeds.size()); output_names_.resize(fetchs.size()); for (int i = 0; i < feeds.size(); i++) { input_names_[feeds[i]->GetAttr("col")] = feeds[i]->Output("Out").front(); } for (int i = 0; i < fetchs.size(); i++) { output_names_[fetchs[i]->GetAttr("col")] = fetchs[i]->Input("X").front(); } } void LightPredictor::BuildRuntimeProgram(const cpp::ProgramDesc& prog) { std::vector insts; // 1. Create op first Program program(prog, scope_, {}); // 2. Create Instructs #ifdef LITE_WITH_OPENCL using WaitListType = std::unordered_map(nullptr)), std::shared_ptr>; using OpenCLContext = Context; std::unique_ptr local_ctx(new KernelContext()); local_ctx->As().InitOnce(); #endif // Create the kernels of the target places, and filter out the specific // kernel with the target alias. for (auto& op : program.ops()) { auto kernel_type = op->op_info()->GetAttr(kKernelTypeAttr); std::string op_type, alias; Place place; KernelBase::ParseKernelType(kernel_type, &op_type, &alias, &place); auto kernels = op->CreateKernels({place}); // filter out a kernel auto it = std::find_if( kernels.begin(), kernels.end(), [&](std::unique_ptr& it) { return it->alias() == alias; }); CHECK(it != kernels.end()); #ifdef LITE_WITH_OPENCL if ((*it)->target() == TARGET(kOpenCL)) { std::unique_ptr ctx(new KernelContext()); (*local_ctx).As().CopySharedTo(&ctx->As()); (*it)->SetContext(std::move(ctx)); } else { (*it)->SetContext(ContextScheduler::Global().NewContext((*it)->target())); } #else (*it)->SetContext(ContextScheduler::Global().NewContext((*it)->target())); #endif insts.emplace_back(op, std::move(*it)); } program_.reset(new RuntimeProgram(std::move(insts))); CHECK(program.exec_scope()); program_->set_exec_scope(program.exec_scope()); } void LightPredictor::DequantizeWeight() { #define PROCESS_CONV2D_DATA() \ for (int64_t i = 0; i < ch; ++i) { \ for (int64_t j = 0; j < offset; ++j) { \ fp_data[i * offset + j] = scale_list[i] * int_data[i * offset + j]; \ } \ } #define PROCESS_FC_DATA() \ for (int64_t i = 0; i < chin; i++) { \ for (int64_t j = 0; j < chout; j++) { \ fp_data[i * chout + j] = scale_list[j] * int_data[i * chout + j]; \ } \ } auto is_weight_quantized_op = [](const cpp::OpDesc* op_desc) { bool result = false; if (op_desc->HasAttr("quantization_type")) { std::string type = op_desc->GetAttr("quantization_type"); result = (type == "post_weight_abs_max") || (type == "post_weight_channel_wise_abs_max"); } else { result = op_desc->HasAttr("quantize_weight_bits"); } return result; }; Tensor tmp_tensor; for (size_t i = 0; i < cpp_program_desc_.BlocksSize(); i++) { auto* block = cpp_program_desc_.GetBlock(i); for (size_t k = 0; k < block->OpsSize(); ++k) { auto* op_desc = block->GetOp(k); if (is_weight_quantized_op(op_desc)) { auto input_names = op_desc->input_vars(); for (auto& input_name : input_names) { std::string input_scale_name = input_name + "_quant_scale"; if (op_desc->HasAttr(input_scale_name)) { // the input is quantized auto input_tensor = scope_->FindVar(input_name)->GetMutable(); tmp_tensor.CopyDataFrom(*input_tensor); auto scale_list = op_desc->GetAttr>(input_scale_name); int quantize_weight_bits = op_desc->GetAttr("quantize_weight_bits"); CHECK(quantize_weight_bits == 8 || quantize_weight_bits == 16); float* fp_data = input_tensor->mutable_data(); std::string op_type = op_desc->Type(); if (op_type == "conv2d" || op_type == "depthwise_conv2d") { int64_t ch = input_tensor->dims()[0]; int64_t offset = input_tensor->numel() / ch; CHECK_EQ(scale_list.size(), ch); if (quantize_weight_bits == 8) { const int8_t* int_data = tmp_tensor.data(); PROCESS_CONV2D_DATA() } else { const int16_t* int_data = tmp_tensor.data(); PROCESS_CONV2D_DATA() } } else if (op_type == "fc" || op_type == "mul") { int64_t chin = input_tensor->dims()[0]; int64_t chout = input_tensor->dims()[1]; CHECK_EQ(scale_list.size(), chout); if (quantize_weight_bits == 8) { const int8_t* int_data = tmp_tensor.data(); PROCESS_FC_DATA() } else { const int16_t* int_data = tmp_tensor.data(); PROCESS_FC_DATA() } } } } } } } #undef PROCESS_CONV2D_DATA #undef PROCESS_FC_DATA } } // namespace lite } // namespace paddle