ir_pass_manager.cc 13.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/inference/analysis/ir_pass_manager.h"
16
#include <map>
17
#include <memory>
18
#include <string>
19
#include <unordered_map>
20 21
#include <unordered_set>
#include <utility>
L
luotao1 已提交
22
#include <vector>
Y
Yan Chunwei 已提交
23
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
24 25
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/scope.h"
26
#include "paddle/fluid/inference/analysis/argument.h"
Y
Yan Chunwei 已提交
27
#include "paddle/fluid/string/pretty_log.h"
28 29 30 31

namespace paddle {
namespace inference {
namespace analysis {
Y
Yan Chunwei 已提交
32 33 34
using string::PrettyLogEndl;
using string::PrettyLog;
using string::Style;
35

36 37 38 39
IRPassManager::IRPassManager(Argument *argument) {
  ARGUMENT_CHECK_FIELD(argument, main_program);
  graph_ = std::unique_ptr<Graph>(new Graph(argument->main_program()));
  if (argument->Has("scope")) {
40
    auto *scope_ptr = argument->scope_ptr();
41 42 43
    PADDLE_ENFORCE_NOT_NULL(scope_ptr,
                            platform::errors::PreconditionNotMet(
                                "The scope ptr should not be nullptr."));
44
    graph_->SetNotOwned(framework::ir::kParamScopeAttr, scope_ptr);
45 46 47 48
  }

  ARGUMENT_CHECK_FIELD(argument, ir_analysis_passes);
  CreatePasses(argument, argument->ir_analysis_passes());
49 50
}

51 52
void IRPassManager::CreatePasses(Argument *argument,
                                 const std::vector<std::string> &passes) {
53
  std::string pre_pass;
L
luotao1 已提交
54
  int pass_num = 0;
55
  for (const std::string &pass_name : passes) {
56
    auto pass = framework::ir::PassRegistry::Instance().Get(pass_name);
57

58
    if (pass_name == "graph_viz_pass") {
59 60 61 62 63 64 65 66 67 68
      std::string optim_cache_dir = argument->optim_cache_dir();
      std::string dot_file_path;
      if (optim_cache_dir.empty()) {
        dot_file_path = std::to_string(pass_num) + "_ir_" +
                        (pre_pass.empty() ? "origin" : pre_pass) + ".dot";
      } else {
        dot_file_path = optim_cache_dir + "/" + std::to_string(pass_num) +
                        "_ir_" + (pre_pass.empty() ? "origin" : pre_pass) +
                        ".dot";
      }
69
      pass->Set("graph_viz_path", new std::string(std::move(dot_file_path)));
70
      pass->Set("optim_cache_dir", new std::string(std::move(optim_cache_dir)));
L
luotao1 已提交
71
      pass_num++;
72
    } else if (pass_name == "mkldnn_placement_pass") {
73 74 75
      pass->Set("mkldnn_enabled_op_types",
                new std::unordered_set<std::string>(
                    argument->mkldnn_enabled_op_types()));
76 77 78
    } else if (pass_name == "cudnn_placement_pass") {
      pass->Set("cudnn_enabled_op_types",
                new std::unordered_set<std::string>());
79
#ifdef PADDLE_WITH_MKLDNN
80 81 82 83 84 85 86
    } else if (pass_name == "cpu_quantize_placement_pass") {
      pass->Set("quantize_enabled_op_types",
                new std::unordered_set<std::string>(
                    argument->quantize_enabled_op_types()));
      pass->Set(
          "quantize_excluded_op_ids",
          new std::unordered_set<int>(argument->quantize_excluded_op_ids()));
87 88 89
    } else if (pass_name == "cpu_quantize_pass") {
      pass->Set("quant_var_scales",
                new VarQuantScale(argument->quant_var_scales()));
90 91 92 93
    } else if (pass_name == "cpu_bfloat16_placement_pass") {
      pass->Set("bfloat16_enabled_op_types",
                new std::unordered_set<std::string>(
                    argument->bfloat16_enabled_op_types()));
94
#endif
95
    } else if (pass_name == "tensorrt_subgraph_pass") {
96 97
      pass->Set("workspace_size", new int(argument->tensorrt_workspace_size()));
      pass->Set("max_batch_size", new int(argument->tensorrt_max_batch_size()));
98 99
      pass->Set("min_subgraph_size",
                new int(argument->tensorrt_min_subgraph_size()));
N
nhzlx 已提交
100 101
      pass->Set("program",
                new framework::ProgramDesc *(&argument->main_program()));
N
nhzlx 已提交
102

Z
Zhaolong Xing 已提交
103 104
      auto precision_mode = argument->tensorrt_precision_mode();
      bool enable_int8 = precision_mode == AnalysisConfig::Precision::kInt8;
N
nhzlx 已提交
105

106
      pass->Set("predictor_id", new int(argument->predictor_id()));
107
      bool use_calib_mode = argument->tensorrt_use_calib_mode();
N
nhzlx 已提交
108
      pass->Set("enable_int8", new bool(enable_int8));
109
      pass->Set("use_calib_mode", new bool(use_calib_mode));
110
      pass->Set("use_oss", new bool(argument->tensorrt_use_oss()));
111 112
      pass->Set("with_interleaved",
                new bool(argument->tensorrt_with_interleaved()));
Z
Zhaolong Xing 已提交
113 114
      pass->Set("precision_mode",
                new AnalysisConfig::Precision(precision_mode));
115 116 117

      bool use_static_engine = argument->tensorrt_use_static_engine();
      bool model_from_memory = argument->model_from_memory();
118
      std::string optim_cache_dir = argument->optim_cache_dir();
119 120
      bool int8_valid = !(model_from_memory && optim_cache_dir.empty() &&
                          enable_int8 && use_calib_mode);
121 122 123 124 125 126
      PADDLE_ENFORCE_EQ(
          int8_valid, true,
          platform::errors::PreconditionNotMet(
              "When you are in TRT INT8 mode, and load model from "
              "memory, you should set optim_cache_dir using "
              "config.SetOptimCacheDir()"));
127 128 129 130 131 132 133 134 135
      if (model_from_memory && use_static_engine) {
        PADDLE_ENFORCE_EQ(
            optim_cache_dir.empty(), false,
            platform::errors::PreconditionNotMet(
                "When you are using Paddle-TRT, and using load model "
                "from memory, and also set the use_static to true. "
                "you must set optim_cache_dir using "
                "config.SetOptimCacheDir()."));
      }
N
nhzlx 已提交
136

137
      if (!optim_cache_dir.empty()) {
138 139 140 141 142 143 144 145
        if (!PathExists(optim_cache_dir)) {
          PADDLE_ENFORCE_NE(
              MKDIR(optim_cache_dir.c_str()), -1,
              platform::errors::PreconditionNotMet(
                  "Can not create optimize cache directory: %s, Make sure you "
                  "have permission to write",
                  optim_cache_dir));
        }
146 147
        pass->Set("model_opt_cache_dir", new std::string(optim_cache_dir));
      } else if (use_static_engine || enable_int8) {
148 149 150 151 152 153 154 155
        std::string model_opt_cache_dir =
            argument->Has("model_dir")
                ? argument->model_dir()
                : GetDirRoot(argument->model_program_path());
        pass->Set(
            "model_opt_cache_dir",
            new std::string(GetOrCreateModelOptCacheDir(model_opt_cache_dir)));
      }
N
nhzlx 已提交
156
      pass->Set("gpu_device_id", new int(argument->gpu_device_id()));
157
      pass->Set("use_static_engine", new bool(use_static_engine));
158
      pass->Set("model_from_memory", new bool(argument->model_from_memory()));
159
      pass->Set("use_inspector", new bool(argument->tensorrt_use_inspector()));
160 161 162 163 164 165 166 167

      // tuned trt dynamic_shape
      pass->Set("trt_shape_range_info_path",
                new std::string(argument->tensorrt_shape_range_info_path()));
      pass->Set("trt_tuned_dynamic_shape",
                new bool(argument->tensorrt_tuned_dynamic_shape()));
      pass->Set("trt_allow_build_at_runtime",
                new bool(argument->tensorrt_allow_build_at_runtime()));
168 169 170 171 172 173 174
      pass->Set("max_input_shape", new std::map<std::string, std::vector<int>>(
                                       argument->max_input_shape()));
      pass->Set("min_input_shape", new std::map<std::string, std::vector<int>>(
                                       argument->min_input_shape()));
      pass->Set("optim_input_shape",
                new std::map<std::string, std::vector<int>>(
                    argument->optim_input_shape()));
175 176 177 178
      bool with_dynamic_shape = (argument->max_input_shape().size() > 0 &&
                                 argument->min_input_shape().size() > 0 &&
                                 argument->optim_input_shape().size() > 0) ||
                                argument->tensorrt_tuned_dynamic_shape();
179
      pass->Set("with_dynamic_shape", new bool(with_dynamic_shape));
180 181
      pass->Set("trt_disabled_ops", new std::vector<std::string>(
                                        argument->tensorrt_disabled_ops()));
182 183
      pass->Set("trt_use_dla", new bool(argument->tensorrt_use_dla()));
      pass->Set("trt_dla_core", new int(argument->tensorrt_dla_core()));
184
      // Setting the disable_trt_plugin_fp16 to true means that TRT plugin will
185
      // not run fp16.
186 187
      pass->Set("disable_trt_plugin_fp16",
                new bool(argument->disable_trt_plugin_fp16()));
D
denglin-github 已提交
188 189 190 191 192
    } else if (pass_name == "dlnne_subgraph_pass") {
      pass->Set("min_subgraph_size",
                new int(argument->dlnne_min_subgraph_size()));
      pass->Set("program",
                new framework::ProgramDesc *(&argument->main_program()));
193
    }
石晓伟 已提交
194 195 196 197 198 199 200 201 202 203
    if (pass_name == "lite_subgraph_pass") {
      bool enable_int8 =
          argument->lite_precision_mode() == AnalysisConfig::Precision::kInt8;
      pass->Set("program",
                new framework::ProgramDesc *(&argument->main_program()));
      pass->Set("lite_ops_filter",
                new std::vector<std::string>(argument->lite_ops_filter()));
      pass->Set("predictor_id", new int(argument->predictor_id()));
      pass->Set("enable_int8", new bool(enable_int8));
      pass->Set("use_gpu", new bool(argument->use_gpu()));
204 205 206 207
      pass->Set("zero_copy", new bool(argument->lite_zero_copy()));
      pass->Set("use_xpu", new bool(argument->use_xpu()));
      pass->Set("xpu_l3_workspace_size",
                new int(argument->xpu_l3_workspace_size()));
W
Wilber 已提交
208 209
      pass->Set("cpu_math_library_num_threads",
                new int(argument->cpu_math_library_num_threads()));
W
Wilber 已提交
210 211 212 213 214 215
      pass->Set("locked", new bool(argument->xpu_locked()));
      pass->Set("autotune", new bool(argument->xpu_autotune()));
      pass->Set("autotune_file",
                new std::string(argument->xpu_autotune_file()));
      pass->Set("precision", new std::string(argument->xpu_precision()));
      pass->Set("adaptive_seqlen", new bool(argument->xpu_adaptive_seqlen()));
216
      pass->Set("xpu_device_id", new int(argument->xpu_device_id()));
217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
      // NNAdapter Related
      pass->Set("use_nnadapter", new bool(argument->use_nnadapter()));
      pass->Set("nnadapter_model_cache_dir",
                new std::string(argument->nnadapter_model_cache_dir()));
      pass->Set(
          "nnadapter_device_names",
          new std::vector<std::string>(argument->nnadapter_device_names()));
      pass->Set("nnadapter_context_properties",
                new std::string(argument->nnadapter_context_properties()));
      pass->Set("nnadapter_subgraph_partition_config_buffer",
                new std::string(
                    argument->nnadapter_subgraph_partition_config_buffer()));
      pass->Set("nnadapter_subgraph_partition_config_path",
                new std::string(
                    argument->nnadapter_subgraph_partition_config_path()));
      pass->Set("nnadapter_model_cache_buffer",
                new std::vector<std::vector<char>>(
                    argument->nnadapter_model_cache_buffer()));
      pass->Set("nnadapter_model_cache_token",
                new std::vector<std::string>(
                    argument->nnadapter_model_cache_token()));
石晓伟 已提交
238
    }
239
    disable_logs_ = argument->disable_logs();
240 241
    if (pass_name == "fc_fuse_pass") {
      pass->Set("use_gpu", new bool(argument->use_gpu()));
242 243 244 245 246 247 248 249
      bool fc_mkldnn_pass = 0;
      for (const std::string &pass_n : passes) {
        if (pass_n == "fc_mkldnn_pass") {
          fc_mkldnn_pass = 1;
        }
      }
      bool use_fc_padding = !fc_mkldnn_pass && argument->use_fc_padding();
      pass->Set("use_fc_padding", new bool(use_fc_padding));
250
    }
251

252 253
    pass->Set("disable_logs", new bool(disable_logs_));

254
    pre_pass = pass_name;
255 256

    passes_.emplace_back(std::move(pass));
257 258 259
  }
}

260 261 262 263
std::unique_ptr<Graph> IRPassManager::Apply(std::unique_ptr<Graph> graph) {
  if (passes_.empty()) {
    return graph;
  }
264 265
  PADDLE_ENFORCE_NOT_NULL(graph.get(), platform::errors::PreconditionNotMet(
                                           "Graph cannot be NULL."));
266 267
  // Apply all the passes
  for (const auto &pass : passes_) {
268
    if (pass->Type() != "graph_viz_pass" && !disable_logs_) {
Y
Yan Chunwei 已提交
269 270
      PrettyLogEndl(Style::H2(), "--- Running IR pass [%s]", pass->Type());
    }
271
    graph.reset(pass->Apply(graph.release()));
272
  }
G
Gabor Buella 已提交
273
  return graph;
274 275 276
}

framework::proto::ProgramDesc IRPassManager::AcquireProgram(
N
nhzlx 已提交
277
    std::unique_ptr<Graph> *graph, ProgramDesc *program) const {
278 279 280
  auto pass =
      framework::ir::PassRegistry::Instance().Get("graph_to_program_pass");

N
nhzlx 已提交
281 282
  // Direct using ProgramDesc desc(argument->main_program()) may cause
  // incomplete copies of information.
N
nhzlx 已提交
283
  ProgramDesc desc;
N
nhzlx 已提交
284
  desc.CopyFrom(*program->Proto());
285 286
  pass->SetNotOwned("program", &desc);
  auto *the_graph = graph->release();
287
  graph->reset(pass->Apply(the_graph));
288 289 290
  return *desc.Proto();
}

291 292 293
}  // namespace analysis
}  // namespace inference
}  // namespace paddle