anakin_subgraph_pass.cc 8.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <algorithm>
16
#include <map>
17 18 19 20 21 22 23 24
#include <memory>
#include <set>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>

#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
25
#include "paddle/fluid/inference/anakin/convert/op_converter.h"
26 27 28 29 30 31 32 33 34 35 36 37
#include "paddle/fluid/inference/anakin/op_teller.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/ir_passes/anakin_subgraph_pass.h"
#include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h"
#include "paddle/fluid/string/pretty_log.h"

namespace paddle {
namespace inference {
namespace analysis {

using framework::ir::Node;

38 39
void analysis::AnakinSubgraphPass::ApplyImpl(
    framework::ir::Graph *graph) const {
N
nhzlx 已提交
40
  framework::ir::FusePassBase::Init("anakin_subgraph_pass", graph);
41 42 43 44 45 46

  auto teller = [](const framework::ir::Node *node) {
    if (!node->IsOp() || !node->Op()) return false;
    return anakin::OpTeller::Global().Tell(node->Op()->Type(), *node->Op());
  };

N
nhzlx 已提交
47
  SubGraphFuser fuser(graph, teller, 6 /* min_subgraph_size */);
48 49
  fuser();

50
  std::vector<std::string> graph_param_names =
N
nhzlx 已提交
51
      ExtractParameters(graph->Nodes());
52 53

  // those parameter already exist in anakin, and should not have another copy
N
nhzlx 已提交
54
  // in fluid.
55 56
  std::vector<std::string> repetitive_params;

57 58
  for (auto *node : graph->Nodes()) {
    if (node->IsOp() && !Agent(node).subgraph()->empty()) {
N
nhzlx 已提交
59
      CreateAnakinOp(node, graph, graph_param_names, &repetitive_params);
60 61
      std::unordered_set<const Node *> nodes2remove(
          Agent(node).subgraph()->begin(), Agent(node).subgraph()->end());
N
nhzlx 已提交
62
      framework::ir::GraphSafeRemoveNodes(graph, nodes2remove);
63 64 65 66 67 68 69 70 71
    }
  }

  std::unordered_set<const Node *> nodes2remove;
  for (auto *node : graph->Nodes()) {
    if (node->IsOp() && Agent(node).deleted()) {
      nodes2remove.insert(node);
    }
  }
N
nhzlx 已提交
72
  framework::ir::GraphSafeRemoveNodes(graph, nodes2remove);
73 74
  graph->Set(framework::ir::kRepetitiveParamAttr,
             new std::vector<std::string>(repetitive_params));
75 76
}

77 78 79
std::string GenerateAnakinEngineKey(const std::set<std::string> &engine_inputs,
                                    const std::set<std::string> &engine_outputs,
                                    std::string id) {
80 81 82 83 84 85 86
  std::string engine_hash_key = "";
  for (auto name : engine_inputs) {
    engine_hash_key += name;
  }
  for (auto name : engine_outputs) {
    engine_hash_key += name;
  }
87
  engine_hash_key += id;
88 89 90 91
  auto engine_key = std::to_string(std::hash<std::string>()(engine_hash_key));
  return engine_key;
}

92 93 94 95
void AnakinSubgraphPass::CreateAnakinOp(
    framework::ir::Node *node, Graph *graph,
    const std::vector<std::string> &graph_params,
    std::vector<std::string> *repetitive_params) const {
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
  auto *op_desc = node->Op();
  auto &subgraph = *Agent(node).subgraph();
  PADDLE_ENFORCE(!subgraph.empty());

  framework::ProgramDesc *program_desc =
      Get<framework::ProgramDesc *>("program");
  // Add new block for TensorRTEngineOP
  const framework::BlockDesc &main_block =
      program_desc->Block(framework::kRootBlockIndex);
  // const framework::BlockDesc& main_block = program_desc->Block(0);
  framework::BlockDesc *new_block = program_desc->AppendBlock(main_block);

  // An fake block desc.
  framework::proto::BlockDesc block_proto;
  framework::BlockDesc block_desc(nullptr, &block_proto);
  block_desc.Proto()->set_parent_idx(-1);
  block_desc.Proto()->set_idx(0);
  string::PrettyLogDetail("---  detect a sub-graph with %d nodes",
                          subgraph.size());

  for (auto *node : subgraph) {
    auto *new_block_op = new_block->AppendOp();
    auto *op = block_desc.AppendOp();
    *new_block_op->Proto() = *node->Op()->Proto();
    *op->Proto() = *node->Op()->Proto();
  }

  // Then, we will use the input_names_with_id and output_names_with_id to
  // generate the eigine key.
  // So, We use set instead of unordered_set here to ensure that the engine key
  // is unique.
  std::set<std::string> input_names;
  std::set<std::string> input_names_with_id;
129
  std::vector<std::string> params;
130 131 132
  for (auto *x : node->inputs) {
    input_names.insert(x->Name());
    input_names_with_id.insert(x->Name() + std::to_string(x->id()));
133 134 135
    if (std::count(graph_params.begin(), graph_params.end(), x->Name()) > 0) {
      params.push_back(x->Name());
    }
136
  }
137 138
  std::copy(params.begin(), params.end(),
            std::back_inserter(*repetitive_params));
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
  op_desc->SetInput(
      "Xs", std::vector<std::string>(input_names.begin(), input_names.end()));

  std::set<std::string> output_names;
  std::set<std::string> output_names_with_id;
  for (auto *x : node->outputs) {
    output_names.insert(x->Name());
    output_names_with_id.insert(x->Name() + std::to_string(x->id()));
  }

  op_desc->SetOutput(
      "Ys", std::vector<std::string>(output_names.begin(), output_names.end()));
  op_desc->SetType("anakin_engine");

  std::unordered_map<std::string, std::string> output_name_map;
154 155 156 157 158 159 160
  std::unordered_map<std::string, framework::ir::Node *> graph_var_map;

  for (framework::ir::Node *node : graph->Nodes()) {
    if (node->IsVar() && node->Var()) {
      graph_var_map[node->Name()] = node;
    }
  }
N
nhzlx 已提交
161
  auto &subgraph_nodes = *Agent(node).subgraph();
162 163 164

  // The following procedure is used to rename all the intermediate
  // variables and the output variables of the subgraph.
N
nhzlx 已提交
165 166
  RenameAndGetOutputs(subgraph_nodes, &block_desc, input_names_with_id,
                      &output_names_with_id, &output_names, &output_name_map,
167
                      graph_var_map, false);
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184

  // When anakin engine runs at the end of the operation,
  // output_mapping help us copy the data from the renamed ITensor
  // to Tensor.
  std::vector<std::string> output_mapping;
  for (auto name : output_names) {
    PADDLE_ENFORCE(output_name_map.count(name) != 0);
    output_mapping.push_back(output_name_map[name]);
  }

  PADDLE_ENFORCE(!block_desc.Proto()->vars().empty(),
                 "the block has no var-desc");
  PADDLE_ENFORCE(!output_mapping.empty());
  op_desc->SetBlockAttr("sub_block", new_block);
  SetAttr(op_desc->Proto(), "subgraph",
          block_desc.Proto()->SerializeAsString());
  // Set attrs
N
nhzlx 已提交
185
  SetAttr(op_desc->Proto(), "parameters", params);
186
  SetAttr(op_desc->Proto(), "output_name_mapping", output_mapping);
187 188 189
  int predictor_id = Get<int>("predictor_id");
  auto engine_key = GenerateAnakinEngineKey(
      input_names_with_id, output_names_with_id, std::to_string(predictor_id));
190 191

  SetAttr(op_desc->Proto(), "engine_key", engine_key);
192 193 194
  auto max_input_shape =
      Get<std::map<std::string, std::vector<int>>>("max_input_shape");
  auto max_batch_size = Get<int>("max_batch_size");
195 196 197

  auto *anakin_engine =
      inference::Singleton<anakin::AnakinEngineManager>::Global().Create(
198 199
          true, Get<int>("gpu_device_id"), max_batch_size, max_input_shape,
          engine_key);
200 201 202 203 204 205 206

  auto *scope = param_scope();
  std::unordered_set<std::string> param_set(params.begin(), params.end());
  framework::BlockDesc block_desc_temp(nullptr, block_desc.Proto());

  inference::Singleton<inference::anakin::AnakinOpConverter>::Global()
      .ConvertBlockToAnakinEngine(
207
          &block_desc_temp, scope,
208 209
          std::vector<std::string>(input_names.begin(), input_names.end()),
          param_set, output_mapping, anakin_engine);
210 211 212 213 214 215 216 217
}

}  // namespace analysis
}  // namespace inference
}  // namespace paddle

REGISTER_PASS(anakin_subgraph_pass,
              paddle::inference::analysis::AnakinSubgraphPass);