// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include #include #include #include #include #include #include #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/inference/engine.h" #include "paddle/fluid/inference/utils/singleton.h" #include "framework/core/net/net.h" #include "framework/core/types.h" #include "framework/graph/graph.h" #include "framework/graph/graph_global_mem.h" #include "saber/saber_types.h" using anakin::Precision; using anakin::saber::NV; namespace anakin { template class Net; namespace graph { template class Graph; } // namespace graph } // namespace anakin namespace paddle { namespace inference { namespace anakin { template class AnakinEngine { using NetT = ::anakin::Net; using GraphT = ::anakin::graph::Graph; public: explicit AnakinEngine( bool need_summary = false, int device = 0, int max_batch_size = 1, std::map> max_input_shape = {}); ~AnakinEngine(); void InitGraph(); void SetInputShape(const std::string &name, std::vector shape); void AddOp(const std::string &name, const std::string &type, const std::vector &inputs, const std::vector &outputs); template void AddOpAttr(const std::string &op_name, const std::string &attr_name, const T &attr_value) { PADDLE_ENFORCE(graph_->AddOpAttr(op_name, attr_name, attr_value), "Add operation's attribution."); } NetT *Net() { return net_.get(); } GraphT *Graph() { return graph_.get(); } std::unique_ptr Clone(); const std::map> &GetMaxInputShape() { return max_input_shape_; } void SetMaxInputShape(std::map> shape) { max_input_shape_ = shape; } int GetMaxBatchSize() { return max_batch_size_; } void Freeze(); void Optimize(); void AllocTmpMem() { PADDLE_ENFORCE(net_->alloc_memory_first(*graph_), "anakin alloc temp memory first failed"); } void Save(std::string path) { graph_->save(path); } bool IsInit() { return initialized_; } int GetDevice() { return device_; } void Execute(const std::map &inputs, const std::map &outputs, cudaStream_t stream); private: bool initialized_{false}; int max_batch_size_; std::map> max_input_shape_; int device_; std::unique_ptr graph_; std::unique_ptr net_; }; class AnakinEngineManager { using AnakinNvEngineT = AnakinEngine; public: bool HasEngine(const std::string &name) const { if (engines_.count(name) == 0) return false; return engines_.at(name).get() != nullptr; } AnakinNvEngineT *Get(const std::string &name) const { return engines_.at(name).get(); } AnakinNvEngineT *Create( bool need_summary, int device, int max_batch_size, std::map> max_input_shape, std::string engine_name) { std::unique_lock lk(mut_); auto *p = new AnakinEngine( need_summary, device, max_batch_size, max_input_shape); engines_[engine_name].reset(p); return p; } void DeleteALL() { for (auto &item : engines_) { item.second.reset(nullptr); } } private: std::unordered_map> engines_; std::mutex mut_; }; } // namespace anakin } // namespace inference } // namespace paddle