engine.h 5.6 KB
Newer Older
F
flame 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <algorithm>
18
#include <functional>
F
flame 已提交
19 20 21
#include <map>
#include <memory>
#include <string>
22
#include <unordered_map>
F
flame 已提交
23 24 25 26
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/engine.h"
#include "paddle/fluid/inference/utils/singleton.h"
27 28 29
#ifdef EXIT  // NOLINT
#undef EXIT  // NOLINT
#endif       // NOLINT
F
flame 已提交
30 31 32
#include "framework/core/net/net.h"
#include "framework/core/types.h"
#include "framework/graph/graph.h"
33
#include "framework/graph/graph_global_mem.h"
F
flame 已提交
34 35
#include "saber/saber_types.h"

36 37
using anakin::Precision;

F
flame 已提交
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
namespace anakin {

template <typename, Precision, OpRunType>
class Net;

namespace graph {
template <typename, Precision>
class Graph;
}  // namespace graph
}  // namespace anakin

namespace paddle {
namespace inference {
namespace anakin {

template <typename TargetT, ::anakin::Precision PrecisionType,
          ::anakin::OpRunType RunType = ::anakin::OpRunType::ASYNC>
class AnakinEngine {
56 57 58
  using NetT = ::anakin::Net<TargetT, PrecisionType, RunType>;
  using GraphT = ::anakin::graph::Graph<TargetT, PrecisionType>;

F
flame 已提交
59
 public:
60 61
  explicit AnakinEngine(
      bool need_summary = false, int device = 0, int max_batch_size = 1,
62 63 64
      std::map<std::string, std::vector<int>> max_input_shape = {},
      std::vector<std::string> program_inputs = {},
      bool auto_config_layout = false);
F
flame 已提交
65
  ~AnakinEngine();
66
  void InitNet();
F
flame 已提交
67 68 69 70 71 72 73 74 75 76 77
  void SetInputShape(const std::string &name, std::vector<int> shape);
  void AddOp(const std::string &name, const std::string &type,
             const std::vector<std::string> &inputs,
             const std::vector<std::string> &outputs);

  template <typename T>
  void AddOpAttr(const std::string &op_name, const std::string &attr_name,
                 const T &attr_value) {
    PADDLE_ENFORCE(graph_->AddOpAttr(op_name, attr_name, attr_value),
                   "Add operation's attribution.");
  }
78
  NetT *Net() { return net_.get(); }
79
  GraphT *Graph() { return graph_.get(); }
F
flame 已提交
80
  std::unique_ptr<AnakinEngine> Clone();
81 82 83 84 85 86
  const std::map<std::string, std::vector<int>> &GetMaxInputShape() {
    return max_input_shape_;
  }
  void SetMaxInputShape(std::map<std::string, std::vector<int>> shape) {
    max_input_shape_ = shape;
  }
87 88 89 90 91 92
  const std::vector<std::string> &GetScalableInputs() {
    return program_inputs_;
  }
  void SetScalableInputs(std::vector<std::string> program_inputs) {
    program_inputs_ = program_inputs;
  }
93
  int GetMaxBatchSize() { return max_batch_size_; }
F
flame 已提交
94 95
  void Freeze();
  void Optimize();
96
  void RegistBlock(::anakin::PBlock<TargetT> *block_p);
97
  void Save(std::string path) { graph_->save(path); }
98
  bool IsInit() { return initialized_; }
99
  int GetDevice() { return device_; }
100 101 102 103 104 105 106 107 108
  void AddTensorScale(const std::string &tensor_name, float scale) {
    tensor_scales_[tensor_name] = scale;
  }
  std::unordered_map<std::string, float> GetTensorScales() {
    return tensor_scales_;
  }
  void Execute(const std::map<std::string, framework::LoDTensor *> &inputs,
               const std::map<std::string, framework::LoDTensor *> &outputs);
#ifdef PADDLE_WITH_CUDA
F
flame 已提交
109
  void Execute(const std::map<std::string, framework::LoDTensor *> &inputs,
110 111
               const std::map<std::string, framework::LoDTensor *> &outputs,
               cudaStream_t stream);
112 113 114 115
#endif

 private:
  void BindInput(const std::map<std::string, framework::LoDTensor *> &inputs);
F
flame 已提交
116 117

 private:
118
  bool initialized_{false};
119
  int device_;
120
  int max_batch_size_;
121
  std::map<std::string, std::vector<int>> max_input_shape_;
122
  std::vector<std::string> program_inputs_;
F
flame 已提交
123 124
  std::unique_ptr<GraphT> graph_;
  std::unique_ptr<NetT> net_;
125
  static std::once_flag init_anakin_;
126 127 128
  std::unordered_map<std::string, float> tensor_scales_;
  // Always be false in gpu mode but true in most cpu cases.
  bool auto_config_layout_;
F
flame 已提交
129 130
};

131
template <typename TargetT, ::anakin::Precision PrecisionType>
132
class AnakinEngineManager {
133
  using AnakinEngineT = AnakinEngine<TargetT, PrecisionType>;
134 135 136 137 138 139

 public:
  bool HasEngine(const std::string &name) const {
    if (engines_.count(name) == 0) return false;
    return engines_.at(name).get() != nullptr;
  }
140
  AnakinEngineT *Get(const std::string &name) const {
141 142 143
    return engines_.at(name).get();
  }

144 145 146 147
  AnakinEngineT *Create(bool need_summary, int device, int max_batch_size,
                        std::map<std::string, std::vector<int>> max_input_shape,
                        std::vector<std::string> program_inputs,
                        bool auto_config_layout, std::string engine_name) {
148
    std::unique_lock<std::mutex> lk(mut_);
149 150 151
    auto *p = new AnakinEngine<TargetT, PrecisionType>(
        need_summary, device, max_batch_size, max_input_shape, program_inputs,
        auto_config_layout);
152 153 154 155 156 157 158 159 160 161 162
    engines_[engine_name].reset(p);
    return p;
  }

  void DeleteALL() {
    for (auto &item : engines_) {
      item.second.reset(nullptr);
    }
  }

 private:
163
  std::unordered_map<std::string, std::unique_ptr<AnakinEngineT>> engines_;
164 165
  std::mutex mut_;
};
F
flame 已提交
166 167 168
}  // namespace anakin
}  // namespace inference
}  // namespace paddle