paddle_pass_builder.cc 21.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/inference/api/paddle_pass_builder.h"
16 17 18
#ifdef PADDLE_WITH_CUDA
#include <cudnn.h>
#endif
19 20 21
#ifdef PADDLE_WITH_HIP
#include <miopen/miopen.h>
#endif
L
Leo Chen 已提交
22 23 24
#ifdef PADDLE_WITH_TENSORRT
#include "paddle/fluid/inference/tensorrt/helper.h"
#endif
25

26
#include <glog/logging.h>
27

28
#include <algorithm>
29
#include <sstream>
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58

namespace paddle {

void PaddlePassBuilder::AppendPass(const std::string &pass_type) {
  passes_.push_back(pass_type);
}

void PaddlePassBuilder::TurnOnDebug() {
  std::vector<std::string> passes;
  auto it = std::begin(passes_);
  while (it != std::end(passes_)) {
    if (*it != "graph_viz_pass") {
      it = passes_.insert(it + 1, "graph_viz_pass");
    } else {
      ++it;
    }
  }
}

std::string PaddlePassBuilder::DebugString() {
  std::stringstream ss;
  ss << "Passes to apply:\n";
  for (auto &pass : passes_) {
    ss << "  - " << pass << '\n';
  }
  return ss.str();
}

void PaddlePassBuilder::DeletePass(const std::string &pass_type) {
59
  deleted_passes_.insert(pass_type);
60 61 62 63 64 65 66 67 68 69
  auto it = std::begin(passes_);
  while (it != std::end(passes_)) {
    if (*it == pass_type) {
      it = passes_.erase(it);
    } else {
      ++it;
    }
  }
}

70 71 72 73 74 75
size_t PaddlePassBuilder::GetPassIndex(const std::string &pass_type) {
  auto iter = std::find(std::begin(passes_), std::end(passes_), pass_type);
  if (iter == std::end(passes_)) return -1;
  return std::distance(std::begin(passes_), iter);
}

76 77 78 79 80 81 82 83
void PaddlePassBuilder::InsertPass(size_t idx, const std::string &pass_type) {
  passes_.insert(std::begin(passes_) + idx, pass_type);
}

void PaddlePassBuilder::DeletePass(size_t idx) {
  passes_.erase(std::begin(passes_) + idx);
}

W
Wojciech Uss 已提交
84 85
void PaddlePassBuilder::AppendAnalysisPass(const std::string &pass) {
  analysis_passes_.push_back(pass);
86 87
}

W
Wojciech Uss 已提交
88 89
void PaddlePassBuilder::ClearPasses() { passes_.clear(); }

90
const std::vector<std::string> kTRTSubgraphPasses({
Y
Yuanle Liu 已提交
91
  "trt_support_nhwc_pass",
92 93 94 95 96 97 98 99
      "adaptive_pool2d_convert_global_pass",          //
      "trt_map_ops_to_matrix_multiply_pass",          //
      "shuffle_channel_detect_pass",                  //
      "quant_conv2d_dequant_fuse_pass",               //
      "delete_quant_dequant_op_pass",                 //
      "delete_quant_dequant_filter_op_pass",          //
      "trt_delete_weight_dequant_linear_op_pass",     //
      "delete_quant_dequant_linear_op_pass",          //
100
      "identity_op_clean_pass",                       //
101
      "add_support_int8_pass",                        //
102
      "simplify_with_basic_ops_pass",                 //
103
      "trt_embedding_eltwise_layernorm_fuse_pass",    //
104
      "preln_embedding_eltwise_layernorm_fuse_pass",  //
105 106 107 108
      "trt_multihead_matmul_fuse_pass_v2",            //
      "trt_multihead_matmul_fuse_pass_v3",            //
      "multihead_matmul_roformer_fuse_pass",          //
      "constant_folding_pass",                        //
L
Leo Chen 已提交
109 110 111 112 113 114 115 116 117 118
#ifdef PADDLE_WITH_TENSORRT
#if !IS_TRT_VERSION_GE(8610)
      "trt_flash_multihead_matmul_fuse_pass",  //
      "trt_cross_multihead_matmul_fuse_pass",  //
#endif
#endif
      "vit_attention_fuse_pass",              //
      "trt_qk_multihead_matmul_fuse_pass",    //
      "layernorm_shift_partition_fuse_pass",  //
      "merge_layernorm_fuse_pass",            //
W
wenbin 已提交
119 120 121
#if !defined _WIN32
      "split_layernorm_to_math_ops_pass",  //
#endif
122 123
#if defined _WIN32  // Windows CI is TensorRT7.0. Remove this after upgrading.
#else
W
wenbin 已提交
124 125
      "trt_skip_layernorm_fuse_pass",          //
      "preln_skip_layernorm_fuse_pass",        //
126
#endif
127 128 129 130 131
      "preln_residual_bias_fuse_pass",   //
      "preln_layernorm_x_fuse_pass",     //
      "reverse_roll_fuse_pass",          //
      "conv_bn_fuse_pass",               //
      "conv_elementwise_add_fuse_pass",  //
132 133 134 135
#if defined _WIN32  // Windows CI is TensorRT7.0. Remove this after upgrading.
#else
      "trans_layernorm_fuse_pass",             //
#endif
136 137
      "remove_padding_recover_padding_pass",         //
      "delete_remove_padding_recover_padding_pass",  //
138
      // "yolo_box_fuse_pass",      //
139 140
      "dense_fc_to_sparse_pass",                //
      "dense_multihead_matmul_to_sparse_pass",  //
W
wenbin 已提交
141 142 143 144
#if defined _WIN32  // Windows CI is TensorRT7.0. Remove this after upgrading.
#else
      "elementwise_groupnorm_act_pass",        //
      "preln_elementwise_groupnorm_act_pass",  //
W
wenbin 已提交
145
      "groupnorm_act_pass",                    //
146
      "elementwiseadd_transpose_pass",         //
W
wenbin 已提交
147 148 149
#endif
      "tensorrt_subgraph_pass",  //
      "conv_bn_fuse_pass",       //
150 151
#if CUDNN_VERSION >= 7100  // To run conv_fusion, the version of cudnn must be
                           // guaranteed at least v7
152 153 154
// cudnn8.0 has memory leak problem in conv + eltwise + act, so we
// disable the pass.
#if !(CUDNN_VERSION >= 8000 && CUDNN_VERSION < 8100)
155 156
      "conv_elementwise_add_act_fuse_pass",   //
      "conv_elementwise_add2_act_fuse_pass",  //
157 158
#endif
#endif
159 160
      "transpose_flatten_concat_fuse_pass",  //
      "auto_mixed_precision_pass",
161 162
});

D
denglin-github 已提交
163 164
const std::vector<std::string> kDlnneSubgraphPasses({
    "is_test_pass",                  //
M
ming1753 已提交
165
    "delete_dropout_op_pass",        //
D
denglin-github 已提交
166 167 168 169 170 171 172
    "simplify_with_basic_ops_pass",  //
    "conv_bn_fuse_pass",             //
    "depthwise_conv_bn_fuse_pass",   //
    "shuffle_channel_detect_pass",   //
    "dlnne_subgraph_pass",           //
});

石晓伟 已提交
173 174 175 176 177 178
const std::vector<std::string> kLiteSubgraphPasses({
#ifdef PADDLE_WITH_LITE
    "lite_subgraph_pass",
#endif
});

179 180 181 182
// TODO(inference): Most of the existing pass fusion operators do not
// support fp16/bf16 precision, temporarily use low precision pass to prevent
// running errors. After fusion operator supports low precision, delete this.
const std::vector<std::string> kGpuLowerPrecisionPasses{
G
gem5 已提交
183
    "map_op_to_another_pass",
184
    "identity_op_clean_pass",
W
Wilber 已提交
185
    "simplify_with_basic_ops_pass",
186
    "silu_fuse_pass",
187
    "delete_quant_dequant_linear_op_pass",
188
    "delete_weight_dequant_linear_op_pass",
189 190 191 192
    "conv_bn_fuse_pass",
    "conv_eltwiseadd_bn_fuse_pass",
    "conv_elementwise_add_act_fuse_pass",
    "conv_elementwise_add2_act_fuse_pass",
M
ming1753 已提交
193
    "conv_elementwise_add_fuse_pass",
194
    "conv2d_fusion_layout_transfer_pass",
W
Wilber 已提交
195
    "multihead_matmul_fuse_pass_v2",
196 197 198 199
    "fused_multi_transformer_encoder_pass",
    "fused_multi_transformer_decoder_pass",
    "fused_multi_transformer_encoder_fuse_qkv_pass",
    "fused_multi_transformer_decoder_fuse_qkv_pass",
200
    "multi_devices_fused_multi_transformer_encoder_pass",
201 202
    "multi_devices_fused_multi_transformer_encoder_fuse_qkv_pass",
    "multi_devices_fused_multi_transformer_decoder_fuse_qkv_pass",
203
    "fuse_multi_transformer_layer_pass",
W
Wilber 已提交
204 205
    "gpu_cpu_map_matmul_v2_to_mul_pass",
    "gpu_cpu_map_matmul_v2_to_matmul_pass",
206
    "gpu_cpu_map_matmul_to_mul_pass",
207
    "fc_fuse_pass",
208
    // "fc_elementwise_layernorm_fuse_pass",
209
    "embedding_eltwise_layernorm_fuse_pass",
210
    "inplace_op_var_pass"};
211

212
const std::vector<std::string> kTrtLowerPrecisionPasses{
W
Wilber 已提交
213
    "simplify_with_basic_ops_pass",
214 215
    // "conv_bn_fuse_pass",
    // "conv_eltwiseadd_bn_fuse_pass",
216 217
    "trt_embedding_eltwise_layernorm_fuse_pass",
    "trt_skip_layernorm_fuse_pass",
218 219 220
    "tensorrt_subgraph_pass",
};

221 222 223 224 225 226 227
const std::vector<std::string> kCINNCompilerPasses{
    "gpu_cpu_map_matmul_v2_to_mul_pass",
    "gpu_cpu_map_matmul_v2_to_matmul_pass",
    "gpu_cpu_map_matmul_to_mul_pass",
    "build_cinn_pass",
};

228 229
GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
  passes_.assign({
G
gem5 已提交
230
    "map_op_to_another_pass",                                           //
231
        "is_test_pass",                                                 //
232 233
        "simplify_with_basic_ops_pass",                                 //
        "delete_quant_dequant_linear_op_pass",                          //
234
        "delete_weight_dequant_linear_op_pass",                         //
235
        "constant_folding_pass",                                        //
236
        "silu_fuse_pass",                                               //
237 238 239 240
        "conv_bn_fuse_pass",                                            //
        "conv_eltwiseadd_bn_fuse_pass",                                 //
        "embedding_eltwise_layernorm_fuse_pass",                        //
        "multihead_matmul_fuse_pass_v2",                                //
241
        "vit_attention_fuse_pass",                                      //
242 243 244 245
        "fused_multi_transformer_encoder_pass",                         //
        "fused_multi_transformer_decoder_pass",                         //
        "fused_multi_transformer_encoder_fuse_qkv_pass",                //
        "fused_multi_transformer_decoder_fuse_qkv_pass",                //
246
        "multi_devices_fused_multi_transformer_encoder_pass",           //
247 248
        "multi_devices_fused_multi_transformer_encoder_fuse_qkv_pass",  //
        "multi_devices_fused_multi_transformer_decoder_fuse_qkv_pass",  //
249
        "fuse_multi_transformer_layer_pass",                            //
250 251 252 253 254 255 256 257 258 259
        "gpu_cpu_squeeze2_matmul_fuse_pass",                            //
        "gpu_cpu_reshape2_matmul_fuse_pass",                            //
        "gpu_cpu_flatten2_matmul_fuse_pass",                            //
        "gpu_cpu_map_matmul_v2_to_mul_pass",                            //
        "gpu_cpu_map_matmul_v2_to_matmul_pass",                         //
        "matmul_scale_fuse_pass",                                       //
        "multihead_matmul_fuse_pass_v3",                                //
        "gpu_cpu_map_matmul_to_mul_pass",                               //
        "fc_fuse_pass",                                                 //
        "fc_elementwise_layernorm_fuse_pass",                           //
260 261
#if CUDNN_VERSION >= 7100  // To run conv_fusion, the version of cudnn must be
                           // guaranteed at least v7
262 263 264
// cudnn8.0 has memory leak problem in conv + eltwise + act, so we
// disable the pass.
#if !(CUDNN_VERSION >= 8000 && CUDNN_VERSION < 8100)
265 266
        "conv_elementwise_add_act_fuse_pass",   //
        "conv_elementwise_add2_act_fuse_pass",  //
267 268 269 270
#endif
        "conv_elementwise_add_fuse_pass",      //
#endif                                         //
        "transpose_flatten_concat_fuse_pass",  //
271
        "identity_op_clean_pass",              //
272
        "conv2d_fusion_layout_transfer_pass",  //
273 274 275
        "transfer_layout_elim_pass",
        "auto_mixed_precision_pass",  //
        "inplace_op_var_pass",        // should be the last pass.
276 277 278 279 280
  });

  use_gpu_ = true;
}

281 282 283 284 285 286 287
void GpuPassStrategy::EnableCUDNN() {
  if (!use_cudnn_) {
    passes_.insert(passes_.begin(), "cudnn_placement_pass");
  }
  use_cudnn_ = true;
}

W
Wojciech Uss 已提交
288 289
void GpuPassStrategy::EnableMKLDNN() {
  LOG(ERROR) << "GPU not support MKLDNN yet";
290 291
}

W
Wojciech Uss 已提交
292 293
void GpuPassStrategy::EnableMkldnnQuantizer() {
  LOG(ERROR) << "GPU not support MKL-DNN quantization";
Y
Yan Chunwei 已提交
294 295
}

296 297 298 299
void GpuPassStrategy::EnableMkldnnBfloat16() {
  LOG(ERROR) << "GPU not support MKL-DNN bfloat16";
}

B
baoachun 已提交
300 301 302 303
void GpuPassStrategy::EnableMkldnnInt8() {
  LOG(ERROR) << "GPU not support MKL-DNN int8";
}

P
Paulina Gacek 已提交
304 305 306 307
void GpuPassStrategy::DisableMkldnnFcPasses() {
  LOG(ERROR) << "GPU not support MKL-DNN fc";
}

308 309 310
CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
  // NOTE the large fusions should be located in the front, so that they will
  // not be damaged by smaller ones.
311 312
  passes_.assign({"simplify_with_basic_ops_pass",  //
                  "layer_norm_fuse_pass",
313
                  "attention_lstm_fuse_pass",       //
314 315
                  "seqconv_eltadd_relu_fuse_pass",  //
                  // "seqpool_concat_fuse_pass",    //
316
                  "seqpool_cvm_concat_fuse_pass",  //
317
                  // "embedding_fc_lstm_fuse_pass", //
318
                  // TODO(wilber): fix correctness problem.
319
                  // "fc_lstm_fuse_pass",                    //
320 321 322 323
                  "mul_lstm_fuse_pass",                      //
                  "fc_gru_fuse_pass",                        //
                  "mul_gru_fuse_pass",                       //
                  "seq_concat_fc_fuse_pass",                 //
324 325 326
                  "gpu_cpu_squeeze2_matmul_fuse_pass",       //
                  "gpu_cpu_reshape2_matmul_fuse_pass",       //
                  "gpu_cpu_flatten2_matmul_fuse_pass",       //
H
heliqi 已提交
327
                  "matmul_v2_scale_fuse_pass",               //
328 329
                  "gpu_cpu_map_matmul_v2_to_mul_pass",       //
                  "gpu_cpu_map_matmul_v2_to_matmul_pass",    //
H
heliqi 已提交
330
                  "matmul_scale_fuse_pass",                  //
331
                  "gpu_cpu_map_matmul_to_mul_pass",          //
332 333 334 335 336 337 338 339
                  "fc_fuse_pass",                            //
                  "repeated_fc_relu_fuse_pass",              //
                  "squared_mat_sub_fuse_pass",               //
                  "conv_bn_fuse_pass",                       //
                  "conv_eltwiseadd_bn_fuse_pass",            //
                  "conv_transpose_bn_fuse_pass",             //
                  "conv_transpose_eltwiseadd_bn_fuse_pass",  //
                  "is_test_pass",                            //
340
                  "constant_folding_pass"});
Y
Yan Chunwei 已提交
341

342 343
  use_gpu_ = false;
}
W
Wojciech Uss 已提交
344

345 346
void CpuPassStrategy::EnableCUDNN() { LOG(ERROR) << "CPU not support cuDNN"; }

W
Wojciech Uss 已提交
347 348 349 350 351 352
void CpuPassStrategy::EnableMKLDNN() {
// TODO(Superjomn) Consider the way to mix CPU with GPU.
#ifdef PADDLE_WITH_MKLDNN
  if (!use_mkldnn_) {
    passes_.insert(passes_.begin(), "mkldnn_placement_pass");

353
    for (auto &pass : std::vector<std::string>({
354
             "squeeze2_transpose2_onednn_fuse_pass",
355 356 357
             "depthwise_conv_mkldnn_pass",    //
             "conv_bn_fuse_pass",             // Execute BN passes again to
             "conv_eltwiseadd_bn_fuse_pass",  // preserve correct pass order
358 359
             "conv_affine_channel_mkldnn_fuse_pass",    //
             "conv_transpose_bn_fuse_pass",             //
360 361
             "conv_transpose_eltwiseadd_bn_fuse_pass",  //
             "conv_bias_mkldnn_fuse_pass",              //
362
             "conv_transpose_bias_mkldnn_fuse_pass",
363 364
             // TODO(baoachun): Need to support 5-dimensional input.
             // "conv3d_bias_mkldnn_fuse_pass",  //
365
             "conv_elementwise_add_mkldnn_fuse_pass",
366 367 368 369 370 371
             "conv_activation_mkldnn_fuse_pass",           //
             "scale_matmul_fuse_pass",                     //
             "reshape_transpose_matmul_mkldnn_fuse_pass",  //
             "matmul_transpose_reshape_mkldnn_fuse_pass",  //
             "matmul_elementwise_add_mkldnn_fuse_pass",    //
             "matmul_activation_mkldnn_fuse_pass",         //
372
             // Disabled due to topology-dependent speed-up
P
Paulina Gacek 已提交
373 374
             "fc_mkldnn_pass",
             "fc_act_mkldnn_fuse_pass",
375
             "fc_elementwise_add_mkldnn_fuse_pass",   //
376
             "self_attention_fuse_pass",              //
377
             "batch_norm_act_fuse_pass",              //
S
Sławomir Siwek 已提交
378
             "softplus_activation_onednn_fuse_pass",  //
379
             "shuffle_channel_mkldnn_detect_pass",    //
380
             "elementwise_act_onednn_fuse_pass",      //
381
             "operator_scale_onednn_fuse_pass",       //
382 383
             "operator_unsqueeze2_onednn_fuse_pass",  //
             "operator_reshape2_onednn_fuse_pass",    //
384
         })) {
W
Wojciech Uss 已提交
385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404
      passes_.push_back(pass);
    }
  }
  use_mkldnn_ = true;
#else
  use_mkldnn_ = false;
#endif
}

void CpuPassStrategy::EnableMkldnnQuantizer() {
#ifdef PADDLE_WITH_MKLDNN
  if (!use_mkldnn_quantizer_) {
    passes_.push_back("cpu_quantize_placement_pass");
  }
  use_mkldnn_quantizer_ = true;
#else
  use_mkldnn_quantizer_ = false;
#endif
}

405 406
void CpuPassStrategy::EnableMkldnnBfloat16() {
#ifdef PADDLE_WITH_MKLDNN
407
  if (!use_mkldnn_bfloat16_) {
T
Tomasz Socha 已提交
408 409 410 411
    passes_.push_back("fc_mkldnn_pass");
    passes_.push_back("fc_act_mkldnn_fuse_pass");
    passes_.push_back("fc_elementwise_add_mkldnn_fuse_pass");

412 413
    passes_.push_back("cpu_bfloat16_placement_pass");
    passes_.push_back("cpu_bfloat16_pass");
414
    passes_.push_back("cpu_quantize_squash_pass");
415
  }
416 417 418 419 420 421
  use_mkldnn_bfloat16_ = true;
#else
  use_mkldnn_bfloat16_ = false;
#endif
}

B
baoachun 已提交
422 423 424 425
void CpuPassStrategy::EnableMkldnnInt8() {
#ifdef PADDLE_WITH_MKLDNN
  if (!use_mkldnn_int8_) {
    passes_.clear();
J
joanna.wozna.intel 已提交
426
    passes_.push_back("simplify_with_basic_ops_pass");
B
baoachun 已提交
427
    passes_.push_back("quant_dequant_mkldnn_pass");
428
    passes_.push_back("mkldnn_placement_pass");
429
    passes_.push_back("constant_folding_pass");
430
    passes_.push_back("squeeze2_transpose2_onednn_fuse_pass");
B
baoachun 已提交
431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
    passes_.push_back("layer_norm_fuse_pass");
    passes_.push_back("attention_lstm_fuse_pass");
    passes_.push_back("seqconv_eltadd_relu_fuse_pass");
    passes_.push_back("fc_lstm_fuse_pass");
    passes_.push_back("mul_lstm_fuse_pass");
    passes_.push_back("fc_gru_fuse_pass");
    passes_.push_back("mul_gru_fuse_pass");
    passes_.push_back("multi_gru_fuse_pass");
    passes_.push_back("multi_gru_seq_fuse_pass");
    passes_.push_back("seq_concat_fc_fuse_pass");
    passes_.push_back("gpu_cpu_squeeze2_matmul_fuse_pass");
    passes_.push_back("gpu_cpu_reshape2_matmul_fuse_pass");
    passes_.push_back("gpu_cpu_flatten2_matmul_fuse_pass");
    passes_.push_back("matmul_v2_scale_fuse_pass");
    passes_.push_back("squared_mat_sub_fuse_pass");
    passes_.push_back("is_test_pass");
    passes_.push_back("gpu_cpu_map_matmul_v2_to_mul_pass");
    passes_.push_back("gpu_cpu_map_matmul_v2_to_matmul_pass");
    passes_.push_back("matmul_scale_fuse_pass");
    passes_.push_back("gpu_cpu_map_matmul_to_mul_pass");
    passes_.push_back("repeated_fc_relu_fuse_pass");
    passes_.push_back("depthwise_conv_mkldnn_pass");
    passes_.push_back("conv_bn_fuse_pass");
    passes_.push_back("conv_eltwiseadd_bn_fuse_pass");
455
    passes_.push_back("conv_affine_channel_mkldnn_fuse_pass");
B
baoachun 已提交
456 457 458 459 460
    passes_.push_back("conv_transpose_bn_fuse_pass");
    passes_.push_back("conv_transpose_eltwiseadd_bn_fuse_pass");
    passes_.push_back("conv_bias_mkldnn_fuse_pass");
    passes_.push_back("conv_transpose_bias_mkldnn_fuse_pass");
    passes_.push_back("conv_elementwise_add_mkldnn_fuse_pass");
461
    passes_.push_back("conv_activation_mkldnn_fuse_pass");
B
baoachun 已提交
462 463 464 465
    passes_.push_back("fc_fuse_pass");
    passes_.push_back("repeated_fc_relu_fuse_pass");
    passes_.push_back("fc_mkldnn_pass");
    passes_.push_back("fc_act_mkldnn_fuse_pass");
466
    passes_.push_back("fc_elementwise_add_mkldnn_fuse_pass");
467
    passes_.push_back("matmul_transpose_reshape_mkldnn_fuse_pass");
B
baoachun 已提交
468
    passes_.push_back("batch_norm_act_fuse_pass");
S
Sławomir Siwek 已提交
469
    passes_.push_back("softplus_activation_onednn_fuse_pass");
B
baoachun 已提交
470 471 472
    passes_.push_back("compute_propagate_scales_mkldnn_pass");
    passes_.push_back("scale_matmul_fuse_pass");
    passes_.push_back("reshape_transpose_matmul_mkldnn_fuse_pass");
473
    passes_.push_back("matmul_elementwise_add_mkldnn_fuse_pass");
474
    passes_.push_back("operator_scale_onednn_fuse_pass");
475 476
    passes_.push_back("operator_unsqueeze2_onednn_fuse_pass");
    passes_.push_back("operator_reshape2_onednn_fuse_pass");
B
baoachun 已提交
477 478 479
    passes_.push_back("cpu_quantize_placement_pass");
    passes_.push_back("cpu_quantize_pass");
    passes_.push_back("cpu_quantize_squash_pass");
480
    passes_.push_back("quant_transpose2_dequant_onednn_fuse_pass");
B
baoachun 已提交
481 482 483 484 485 486 487
  }
  use_mkldnn_int8_ = true;
#else
  use_mkldnn_int8_ = false;
#endif
}

P
Paulina Gacek 已提交
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511
void CpuPassStrategy::DisableMkldnnFcPasses() {
#ifdef PADDLE_WITH_MKLDNN
  if (!disable_mkldnn_fc_passes_) {
    EraseFcMkldnnPasses();
  }
  disable_mkldnn_fc_passes_ = true;
#else
  disable_mkldnn_fc_passes_ = false;
#endif
}

void CpuPassStrategy::EraseFcMkldnnPasses() {
  std::vector<std::string> fc_passes_to_erase(
      {"fc_mkldnn_pass",
       "fc_act_mkldnn_fuse_pass",
       "fc_elementwise_add_mkldnn_fuse_pass"});
  for (const auto &pass : fc_passes_to_erase) {
    int idx = GetPassIndex(pass);
    if (idx != -1) {
      passes_.erase(std::begin(passes_) + idx);
    }
  }
}

512 513
XpuPassStrategy::XpuPassStrategy() : PassStrategy({}) {
  passes_.assign({
Z
zhupengyang 已提交
514
      "delete_assign_op_pass",
515
      "delete_dropout_op_pass",
516
      "delete_concat_op_pass",
517
      "delete_repeated_ops_pass",
518 519
      "identity_op_clean_pass",
      "fused_continuous_same_ops_pass",
520
      "reshape_unstack_concat_fuse_pass",
Z
zhupengyang 已提交
521 522
      "delete_op_device_pass",
      "constant_folding_pass",
523
      "delete_elementwise_mul_op_pass",
524
      "generate_sequence_xpu_fuse_pass",
525
      "embedding_with_eltwise_add_xpu_fuse_pass",
526
      "multi_encoder_xpu_fuse_pass",
527
      "multi_encoder_xpu_adaptive_seqlen_fuse_pass",
528
      "multi_encoder_xpu_slice_fuse_pass",
529
      "fused_multi_transformer_cachekv_layout_trans_pass",
530
      "one_beam_size_fuse_pass",
531
      "fold_interp_outsize_fuse_pass",
532
      "fold_two_squeeze2_fuse_pass",
W
wz1qqx 已提交
533 534
      "redundant_onnx_ops_elimination_pass",
      "reduce_ops_fuse_pass",
535
      "delete_cast_op_pass",
536
      "xpu_delete_cast_op_pass",
Z
zhupengyang 已提交
537
      "stack_fuse_pass",
538
      "fused_multi_transformer_xpu_pass",
W
wz1qqx 已提交
539
      "relu6_fuse_pass",
540
      "sigmoid_elementmul_fuse_pass",
W
wz1qqx 已提交
541
      "layer_norm_fuse_pass",
542 543 544
      "matmul_weight_trans_pass",
      "map_matmulv2_to_matmul_xpu_pass",
      "reshape2_matmul_xpu_fuse_pass",
W
wz1qqx 已提交
545
      "squeeze2_matmul_xpu_fuse_pass",
546
      "redundant_squeeze_unsqueeze_elimination_pass",
547
      "fc_xpu_fuse_pass",
548
      "conv2d_xpu_fuse_pass",
549
      "conv2d_transpose_xpu_fuse_pass",
W
wz1qqx 已提交
550
      "add_activation_xpu_fuse_pass",
W
wz1qqx 已提交
551
      "add_layernorm_xpu_fuse_pass",
552
      "yolo_box_xpu_fuse_pass",
553
      "link_xpu_op_max_pass",
554
      "delete_isolated_node_pass",
555 556 557
      // "auto_mixed_precision_pass",
      "cast_mixed_precision_op_fuse_pass",
      "inplace_op_var_pass",
558 559 560 561
  });
  use_xpu_ = true;
}

J
jianghaicheng 已提交
562 563 564 565
IpuPassStrategy::IpuPassStrategy() : PassStrategy({}) {
  passes_.assign({"inference_process_pass"});
}

566
}  // namespace paddle