cinn_compiler.cc 8.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h"

17
#include <cstdint>
18
#include <iterator>
19 20 21
#include <map>
#include <memory>
#include <string>
22
#include <unordered_map>
23 24 25 26 27 28 29 30 31 32 33

#include "cinn/common/target.h"
#include "cinn/common/type.h"
#include "cinn/frontend/decomposer/use_decomposer.h"
#include "cinn/frontend/pass/use_program_pass.h"
#include "cinn/frontend/program_pass.h"
#include "cinn/frontend/syntax.h"
#include "cinn/hlir/framework/graph.h"
#include "cinn/hlir/framework/graph_compiler.h"
#include "cinn/hlir/framework/pass.h"
#include "cinn/hlir/pass/use_pass.h"
34
#include "paddle/fluid/framework/framework.pb.h"
35 36
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
37
#include "paddle/fluid/framework/ir/node.h"
38 39 40
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.h"
#include "paddle/fluid/framework/program_desc.h"
41
#include "paddle/fluid/framework/rw_lock.h"
42
#include "paddle/fluid/framework/tensor.h"
43
#include "paddle/fluid/inference/analysis/dot.h"
44
#include "paddle/fluid/operators/cinn/cinn_launch_context.h"
45
#include "paddle/fluid/platform/enforce.h"
46
#include "paddle/fluid/string/string_helper.h"
47 48 49 50 51 52

namespace paddle {
namespace framework {
namespace paddle2cinn {

using ir::Graph;
53 54
using ir::Node;
using inference::analysis::Dot;
55 56 57 58 59 60 61 62 63 64 65 66
using ::cinn::common::Target;
using ::cinn::common::Float;
using ::cinn::hlir::framework::GraphCompiler;
using ::cinn::hlir::framework::BuildScope;
using ::cinn::frontend::ProgramPass;
using ::cinn::hlir::framework::ApplyPass;

CinnCompiler* CinnCompiler::GetInstance() {
  static CinnCompiler instance;
  return &instance;
}

67 68 69
const CinnCompiledObject& CinnCompiler::Compile(
    const Graph& graph,
    const std::map<std::string, const LoDTensor*>& input_tensors,
70
    const Target& target, void* stream) {
71
  VLOG(1) << "-- The graph to be compiled is:\n" << VizGraph(graph);
J
jiangcheng 已提交
72 73 74 75
  CinnCacheKeyByAddress cur_key_by_address(graph, input_tensors,
                                           target.arch_str());
  CinnCacheKeyByStructure cur_key_by_struct;

76 77 78
  bool exist = false;
  {
    AutoRDLock r_guard{&rwlock_};
J
jiangcheng 已提交
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
    exist = cache_by_address_.count(cur_key_by_address) != 0;
    // if cannot find graph by address, checkout whether the graph structure
    // have been stored in cache.
    if (!exist) {
      // generate the structure cache key
      cur_key_by_struct.SetKey(graph, input_tensors, target.arch_str());

      // if the graph structure can be found, storing the graph address in
      // cache for next query.
      if (cache_by_struct_.count(cur_key_by_struct) != 0) {
        exist = true;
        cache_by_address_[cur_key_by_address] =
            cache_by_struct_.at(cur_key_by_struct).get();
      }
    }
94 95
  }
  if (!exist) {
96 97
    std::int64_t compiled_num = real_compiled_num_.fetch_add(1);
    auto compiled_res =
98
        CompileGraph(graph, input_tensors, target, compiled_num, stream);
99
    AutoWRLock w_guard{&rwlock_};
J
jiangcheng 已提交
100 101 102
    if (!cache_by_struct_.count(cur_key_by_struct)) {
      cache_by_address_[cur_key_by_address] = compiled_res.get();
      cache_by_struct_[cur_key_by_struct] = std::move(compiled_res);
103 104 105
    }
  }
  AutoRDLock guard{&rwlock_};
J
jiangcheng 已提交
106
  const auto& cached_boj = *cache_by_address_[cur_key_by_address];
107 108 109 110 111 112
  return cached_boj;
}

const CinnCompiledObject& CinnCompiler::Compile(
    const std::string& compilation_key,
    const std::map<std::string, const LoDTensor*>& input_tensors,
113
    const Target& target, void* stream) {
114
  const auto& graph = FindGraph(compilation_key);
115
  return Compile(graph, input_tensors, target, stream);
116 117
}

118 119 120 121 122
std::string CinnCompiler::AddGraph(std::unique_ptr<Graph> graph) {
  std::string graph_key;
  ProgramDesc program;
  GraphToProgram(*graph, &program);
  program.Proto()->SerializeToString(&graph_key);
123 124 125 126 127 128 129 130 131

  PADDLE_ENFORCE_EQ(
      graphs_.count(graph_key), 0,
      platform::errors::PreconditionNotMet(
          "The graph to be added is already in CinnCompiler, which is:\n",
          VizGraph(graph_key).c_str()));
  graphs_[graph_key] = std::move(graph);
  VLOG(4) << "-- Add a graph into CinnCompiler, which is:\n"
          << VizGraph(graph_key);
132 133 134 135 136 137
  return graph_key;
}

const Graph& CinnCompiler::FindGraph(const std::string& graph_key) const {
  PADDLE_ENFORCE_NE(
      graphs_.count(graph_key), 0,
138 139 140
      platform::errors::PreconditionNotMet(
          "Can not find the target graph, of which the key is:\n%s",
          ReadableKey(graph_key).c_str()));
141 142 143
  return *graphs_.at(graph_key);
}

144 145 146 147 148 149
std::string CinnCompiler::VizGraph(const std::string& graph_key) const {
  const Graph& graph = FindGraph(graph_key);
  return VizGraph(graph);
}

std::string CinnCompiler::VizGraph(const Graph& graph) const {
150 151 152 153 154 155 156 157 158 159 160
  Dot dot;
  std::unordered_map<const Node*, std::string> node2dot;
  int id = 0;
  // Create nodes
  for (const Node* n : graph.Nodes()) {
    std::string node_id = "Node" + std::to_string(id++);
    if (n->IsOp()) {
      dot.AddNode(
          node_id,
          {Dot::Attr("shape", "box"), Dot::Attr("style", "rounded,filled,bold"),
           Dot::Attr("color", "#303A3A"), Dot::Attr("fontcolor", "#ffffff")},
161
          n->Name(), true);
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
    } else if (n->IsVar()) {
      auto label = n->Name();
      if (n->Var() && n->Var()->GetType() == proto::VarType::LOD_TENSOR) {
        auto shape = n->Var()->GetShape();
        std::vector<std::string> shape_str(shape.size());
        std::transform(shape.begin(), shape.end(), shape_str.begin(),
                       [](const auto& val) { return std::to_string(val); });
        label += "\n" + string::join_strings(shape_str, ',');
      }
      dot.AddNode(
          node_id,
          {Dot::Attr("shape", "box"), Dot::Attr("style", "rounded,filled,bold"),
           Dot::Attr("color", n->Var()->IsParameter() ? "#148b97" : "#dddddd"),
           Dot::Attr("fontcolor",
                     n->Var()->IsParameter() ? "#ffffff" : "#000000")},
177
          label, true);
178 179 180 181 182 183 184 185 186 187
    }
    node2dot[n] = node_id;
  }
  // Create edges
  for (const Node* n : graph.Nodes()) {
    const auto& src_id = node2dot.at(n);
    for (auto* out : n->outputs) {
      const auto& dest_id = node2dot.at(out);
      dot.AddEdge(src_id, dest_id, {});
    }
188
  }
189
  return dot.Build();
190 191
}

192 193
std::string CinnCompiler::ReadableKey(
    const std::string& compilation_key) const {
194
  proto::ProgramDesc desc;
195
  desc.ParseFromString(compilation_key);
196 197 198 199 200 201 202
  return desc.DebugString();
}

void CinnCompiler::Clear() {
  {
    AutoWRLock guard{&rwlock_};
    graphs_.clear();
J
jiangcheng 已提交
203 204
    cache_by_address_.clear();
    cache_by_struct_.clear();
205
  }
H
Huihuang Zheng 已提交
206
  real_compiled_num_.store(0);
207 208 209 210 211
}

std::unique_ptr<CinnCompiledObject> CinnCompiler::CompileGraph(
    const ir::Graph& graph,
    const std::map<std::string, const LoDTensor*>& input_tensors,
212
    const Target& target, std::int64_t compiled_num, void* stream) const {
213
  CinnGraphSymbolization symbol{compiled_num, graph, target, input_tensors};
214 215
  auto frontend_program = symbol();
  ProgramPass::Apply(&frontend_program, target, {"Decomposer"});
216 217
  auto fetch_ids = symbol.GetFetchIds();
  ::cinn::frontend::ApplyPass(&frontend_program, fetch_ids, "RemoveIdentity");
218 219
  auto cinn_graph = std::make_shared<::cinn::hlir::framework::Graph>(
      frontend_program, target);
220
  VLOG(1) << "-- The " << compiled_num << "-th compilation ("
221 222 223 224
          << target.arch_str() << "), and its related graph:\n"
          << cinn_graph->Visualize();
  ApplyPass(cinn_graph.get(), "OpFusion");
  auto scope = BuildScope(target, cinn_graph);
225

226 227 228
  VLOG(4) << "All fetch var ids in CINN: "
          << string::join_strings(fetch_ids, ',');

229 230
  auto graph_compiler =
      std::make_unique<GraphCompiler>(target, scope, cinn_graph);
231 232
  GraphCompiler::CompileOptions options;
  options.with_instantiate_variables = false;
233
  options.with_buffer_handle_instruction_inserted = true;
234 235
  auto compiled_res =
      graph_compiler->Build(options, std::move(fetch_ids), stream);
236
  auto compiled_obj = std::make_unique<CinnCompiledObject>();
237 238
  *compiled_obj = {std::move(graph_compiler),
                   std::move(compiled_res.runtime_program), scope,
239
                   symbol.var_model_to_program_map()};
240 241 242
  compiled_obj->launch_context =
      std::make_unique<operators::details::CinnLaunchContext>(
          compiled_obj->paddle2cinn_varmap, compiled_obj->scope);
243 244 245 246 247 248
  return compiled_obj;
}

}  // namespace paddle2cinn
}  // namespace framework
}  // namespace paddle