// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include #include #include #include #include "graph/tensor.h" #include "lite/backends/huawei_ascend_npu/device.h" #include "lite/core/kernel.h" #include "lite/kernels/npu/bridges/engine.h" #include "lite/kernels/npu/bridges/registry.h" namespace paddle { namespace lite { namespace kernels { namespace huawei_ascend_npu { using TensorDesc = paddle::lite::huawei_ascend_npu::TensorDesc; using AclModelClient = paddle::lite::huawei_ascend_npu::AclModelClient; class DeviceProgram { public: DeviceProgram() {} ~DeviceProgram() {} std::string GenerateModelName( const std::vector& input_names, const std::vector& output_names, const std::vector>& origin_idims); bool LoadFromCacheFile(const std::vector& input_names, const std::vector& output_names, const std::vector>& origin_idims, const std::string& model_cache_dir, const int device_id); bool BuildGraphAndCacheToFile( RuntimeProgram* origin_program, const std::vector& input_names, const std::vector& output_names, const std::vector>& origin_idims, const std::vector& origin_otensors, const std::string& model_cache_dir, const int device_id); bool ShareBufferWithOriginTensors( const std::vector& input_names, const std::vector& output_names, std::vector* origin_itensors, std::vector* origin_otensors, std::vector>* device_itensors, std::vector>* device_otensors); bool SharedBufferWithOutputTensors( const std::vector& output_names, std::vector* origin_otensors, std::vector>* device_otensors); bool ZeroCopyRun(std::vector>* device_itensors, std::vector>* device_otensors); public: std::string model_name_{""}; std::shared_ptr model_client_{nullptr}; std::vector> origin_odims_; std::vector origin_otypes_; std::vector device_idims_{}; std::vector device_odims_{}; }; class SubgraphEngine : public subgraph::Engine { public: SubgraphEngine(KernelContext* ctx, int block_idx, const std::shared_ptr& program_desc, Scope* exec_scope, const std::vector& input_names, const std::vector& output_names) : subgraph::Engine(ctx, block_idx, program_desc, exec_scope, input_names, output_names) {} protected: bool PrepareWorkspaceForDeviceProgram() override; bool BuildDeviceProgram() override; bool LaunchDeviceProgram() override; private: std::vector> device_itensors_{}; std::vector> device_otensors_{}; std::map>, std::shared_ptr> device_programs_; }; class SubgraphCompute : public KernelLite { public: using param_t = operators::SubgraphParam; void PrepareForRun() override; void Run() override; virtual ~SubgraphCompute() = default; private: std::unique_ptr engine_; }; } // namespace huawei_ascend_npu } // namespace kernels } // namespace lite } // namespace paddle