// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include #include #include #include #include #include #include #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/inference/engine.h" #include "paddle/fluid/inference/utils/singleton.h" #include "framework/core/net/net.h" #include "framework/core/types.h" #include "framework/graph/graph.h" #include "framework/graph/graph_global_mem.h" #include "saber/saber_types.h" using anakin::Precision; namespace anakin { template class Net; namespace graph { template class Graph; } // namespace graph } // namespace anakin namespace paddle { namespace inference { namespace anakin { template class AnakinEngine { using NetT = ::anakin::Net; using GraphT = ::anakin::graph::Graph; public: explicit AnakinEngine( bool need_summary = false, int device = 0, int max_batch_size = 1, std::map> max_input_shape = {}, std::vector program_inputs = {}, bool auto_config_layout = false); ~AnakinEngine(); void InitNet(); void SetInputShape(const std::string &name, std::vector shape); void AddOp(const std::string &name, const std::string &type, const std::vector &inputs, const std::vector &outputs); template void AddOpAttr(const std::string &op_name, const std::string &attr_name, const T &attr_value) { PADDLE_ENFORCE(graph_->AddOpAttr(op_name, attr_name, attr_value), "Add operation's attribution."); } NetT *Net() { return net_.get(); } GraphT *Graph() { return graph_.get(); } std::unique_ptr Clone(); const std::map> &GetMaxInputShape() { return max_input_shape_; } void SetMaxInputShape(std::map> shape) { max_input_shape_ = shape; } const std::vector &GetScalableInputs() { return program_inputs_; } void SetScalableInputs(std::vector program_inputs) { program_inputs_ = program_inputs; } int GetMaxBatchSize() { return max_batch_size_; } void Freeze(); void Optimize(); void RegistBlock(::anakin::PBlock *block_p); void Save(std::string path) { graph_->save(path); } bool IsInit() { return initialized_; } int GetDevice() { return device_; } void AddTensorScale(const std::string &tensor_name, float scale) { tensor_scales_[tensor_name] = scale; } std::unordered_map GetTensorScales() { return tensor_scales_; } void Execute(const std::map &inputs, const std::map &outputs); #ifdef PADDLE_WITH_CUDA void Execute(const std::map &inputs, const std::map &outputs, cudaStream_t stream); #endif private: void BindInput(const std::map &inputs); private: bool initialized_{false}; int device_; int max_batch_size_; std::map> max_input_shape_; std::vector program_inputs_; std::unique_ptr graph_; std::unique_ptr net_; static std::once_flag init_anakin_; std::unordered_map tensor_scales_; // Always be false in gpu mode but true in most cpu cases. bool auto_config_layout_; }; template class AnakinEngineManager { using AnakinEngineT = AnakinEngine; public: bool HasEngine(const std::string &name) const { if (engines_.count(name) == 0) return false; return engines_.at(name).get() != nullptr; } AnakinEngineT *Get(const std::string &name) const { return engines_.at(name).get(); } AnakinEngineT *Create(bool need_summary, int device, int max_batch_size, std::map> max_input_shape, std::vector program_inputs, bool auto_config_layout, std::string engine_name) { std::unique_lock lk(mut_); auto *p = new AnakinEngine( need_summary, device, max_batch_size, max_input_shape, program_inputs, auto_config_layout); engines_[engine_name].reset(p); return p; } void DeleteALL() { for (auto &item : engines_) { item.second.reset(nullptr); } } private: std::unordered_map> engines_; std::mutex mut_; }; } // namespace anakin } // namespace inference } // namespace paddle