// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include #include #include #include #include #include "lite/core/op_lite.h" #include "lite/core/tensor.h" #include "lite/kernels/mlu/bridges/tensor.h" namespace paddle { namespace lite { namespace subgraph { namespace mlu { // The Context of the converters which used for converting the ops of subgraph // to the MLU IR graph class Graph { public: Graph() { CNML_CALL(cnmlCreateFusionOp(&fusion_op_)); } ~Graph() { CNML_CALL(cnmlDestroyFusionOp(&fusion_op_)); for (auto op : ops_) { CNML_CALL(cnmlDestroyBaseOp(&op)); } } // Data node std::shared_ptr AddNode( const std::string& name, std::vector shape, cnmlTensorType_t tensor_type = CNML_TENSOR, cnmlDataOrder_t data_order = CNML_NCHW, cnmlDataType_t mlu_dtype = CNML_DATA_FLOAT32, void* raw_ptr = nullptr); std::shared_ptr GetNode(const std::string& name) { CHECK(HasNode(name)) << "[MLU] Node " << name << " not found."; return nodes_.at(name); } bool HasNode(const std::string& name) { return nodes_.find(name) != nodes_.end(); } void AddInput(std::shared_ptr tensor) { inputs_.push_back(tensor->mlu_tensor()); input_tensors_.push_back(tensor); } void AddOutput(std::shared_ptr tensor) { outputs_.push_back(tensor->mlu_tensor()); output_tensors_.push_back(tensor); } void FuseOp(cnmlBaseOp_t op) { CNML_CALL(cnmlFuseOp(op, fusion_op_)); } void Compile(cnmlCoreVersion_t core_version, int core_number) { CNML_CALL(cnmlSetFusionIO(fusion_op_, inputs_.data(), inputs_.size(), outputs_.data(), outputs_.size())); CNML_CALL(cnmlSetFusionOpCorenum(fusion_op_, core_number)); CNML_CALL(cnmlSetFusionOpCoreVersion(fusion_op_, core_version)); CNML_CALL(cnmlCompileFusionOp_V2(fusion_op_)); for (auto in : input_tensors_) { input_addrs_.push_back(in->mlu_data()); } for (auto out : output_tensors_) { output_addrs_.push_back(out->mlu_data()); } } void Compute(cnrtInvokeFuncParam_t forward_param, cnrtQueue_t que) { CNML_CALL(cnmlComputeFusionOpForward_V3(fusion_op_, input_addrs_.data(), input_addrs_.size(), output_addrs_.data(), output_addrs_.size(), &forward_param, que)); CNRT_CALL(cnrtSyncQueue(que)); } void BindConstData(std::string tensor_name, ::paddle::lite::Tensor* tensor) { const float* data = tensor->data(); size_t len = tensor->data_size(); if (fp_type_ == CNML_DATA_FLOAT32) { CNML_CALL(cnmlBindConstData_V2( nodes_[tensor_name]->mlu_tensor(), const_cast(static_cast(data)), false)); } else if (fp_type_ == CNML_DATA_FLOAT16) { auto* data_fp16 = tensor->mutable_data<::paddle::lite::fluid::float16>(); for (size_t i = 0; i < len; ++i) { data_fp16[i] = static_cast<::paddle::lite::fluid::float16>(data[i]); } CNML_CALL(cnmlBindConstData_V2(nodes_[tensor_name]->mlu_tensor(), static_cast(data_fp16), false)); } else { CHECK(0); } } void SetComputingDataType(cnmlBaseOp_t op, cnmlTensor_t tensor, float scale, cnmlDataType_t data_type = CNML_DATA_INT8) { cnmlQuantizedParam_t quant_param; CNML_CALL( cnmlCreateQuantizedParam(&quant_param, scale2position(scale), 1, 0.0)); CNML_CALL( cnmlSetOperationComputingDataType(op, tensor, data_type, quant_param)); CNML_CALL(cnmlDestroyQuantizedParam(&quant_param)); } void SetFPType(::paddle::lite_api::PrecisionType type) { switch (type) { case ::paddle::lite_api::PrecisionType::kFP16: fp_type_ = CNML_DATA_FLOAT16; break; case ::paddle::lite_api::PrecisionType::kFloat: fp_type_ = CNML_DATA_FLOAT32; break; default: CHECK(0); } } cnmlDataType_t FPType() { return fp_type_; } private: cnmlDataType_t fp_type_{CNML_DATA_FLOAT32}; std::unordered_map> nodes_; std::vector inputs_; std::vector outputs_; std::vector input_addrs_; std::vector output_addrs_; std::vector> input_tensors_; std::vector> output_tensors_; std::vector ops_; cnmlFusionOp_t fusion_op_; }; } // namespace mlu } // namespace subgraph } // namespace lite } // namespace paddle