build_cinn_pass.h 4.1 KB
Newer Older
J
jiangcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

17 18 19 20 21 22
#include <functional>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>

J
jiangcheng 已提交
23
#include "paddle/fluid/framework/ir/pass.h"
24 25
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/errors.h"
J
jiangcheng 已提交
26 27 28

namespace paddle {
namespace framework {
29 30
namespace ir {
class MemOptVarInfo;
31
class Node;
32 33
}  // namespace ir

J
jiangcheng 已提交
34 35
namespace paddle2cinn {

36
constexpr char kCinnLaunchOp[] = "cinn_launch";
37 38 39 40
constexpr char kInputVars[] = "InputVars";
constexpr char kNoNeedBufferFeeds[] = "NoNeedBufferFeeds";
constexpr char kInternalVars[] = "InternalVars";
constexpr char kOutputVars[] = "OutputVars";
41 42
constexpr char kMemOptVarInfoFromMainGraph[] =
    "mem_opt_var_info_from_main_graph";
43

44 45 46
using Name2VarInfoMap =
    std::unordered_map<std::string,
                       std::shared_ptr<framework::ir::MemOptVarInfo>>;
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
using GraphNodeSet = std::unordered_set<ir::Node*>;

struct OpTransInfo {
  const std::unordered_set<std::string> default_deny_ops{"feed", "fetch"};

  const std::unordered_map<std::string, std::function<bool(const ir::Node*)>>
      dynamic_op_cond{
          {"slice", [](const ir::Node* node) -> bool {
             if (!node->IsOp()) {
               return false;
             }
             auto* op_desc = node->Op();
             auto infer_flags =
                 op_desc->GetAttrIfExists<std::vector<int>>("infer_flags");
             if (std::find_if(
                     infer_flags.begin(), infer_flags.end(), [](int v) {
                       return v < 0;
                     }) != infer_flags.end()) {
               return true;
             }
             return false;
           }}};

  const std::unordered_map<std::string, std::unordered_set<std::string>>
      deny_param_cond{{"batch_norm", {"ReserveSpace"}},
                      {"batch_norm_grad", {"ReserveSpace"}}};

  std::unordered_set<std::string> GetDenyVarNames(
      const GraphNodeSet& cluster) const;
};
J
jiangcheng 已提交
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94

// A pass named BuildCinnPass, the function of this pass is:
//
// a) Detect the subgraphs that can be compiled by the CINN compiler. We call a
// detected subgraph a cluster, which is consisted of several op nodes.
//
// b) Call the CINN compiler to compile each original cluster and get the
// compiled cluster, which is consisted of several kCinnLaunchOp.
//
// c) Replace the original cluster with corresponding compiled cluster on the
// original graph.
//
// In this pass, some questions are handled with cautions:
//
// a) How to determine whether two op nodes can be divided into a cluster?
// Firstly, both op nodes should be compile supported.
// Secondly, there should be a direct path between the two op nodes through a
// var node.
95
// Thirdly, there should be no extra path between the two op nodes through
J
jiangcheng 已提交
96 97
// unsupported op nodes.
// Lastly, if op nodes a and b can be divied into a cluster, op nodes b and c
98 99 100 101
// can be divided into a cluster, a and c can also be divided into a cluster.
// The implementation of cluster detection is encapsulated in the
// SubGraphDetector
// class.
J
jiangcheng 已提交
102 103 104 105 106 107 108 109 110 111 112 113 114 115
//
// b) How to deal with the links between the var nodes in global graph and the
// op nodes in a cluster?
// We first add links between the var nodes in global graph and the op nodes in
// the compiled cluster, and then remove useless links between the var nodes in
// global graph and the op nodes in the original cluster.
class BuildCinnPass : public framework::ir::Pass {
 protected:
  void ApplyImpl(framework::ir::Graph* graph) const override;
};

}  // namespace paddle2cinn
}  // namespace framework
}  // namespace paddle