diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index afffaa4729555475e1cfb71dbca8cfcfbaca7bbc..59c56766e54c6a76e3d8747bf00cc58a37624b06 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -20,6 +20,7 @@ #include #endif #include +#include #include namespace paddle { @@ -60,6 +61,12 @@ void PaddlePassBuilder::DeletePass(const std::string &pass_type) { } } +size_t PaddlePassBuilder::GetPassIndex(const std::string &pass_type) { + auto iter = std::find(std::begin(passes_), std::end(passes_), pass_type); + if (iter == std::end(passes_)) return -1; + return std::distance(std::begin(passes_), iter); +} + void PaddlePassBuilder::InsertPass(size_t idx, const std::string &pass_type) { passes_.insert(std::begin(passes_) + idx, pass_type); } @@ -268,7 +275,11 @@ void CpuPassStrategy::EnableMKLDNN() { #ifdef PADDLE_WITH_MKLDNN if (!use_mkldnn_) { passes_.insert(passes_.begin(), "mkldnn_placement_pass"); - + int id = GetPassIndex("gpu_cpu_reshape2_matmul_fuse_pass"); + // this pass slows down FC mkldnn int8 operator + if (id != -1) { + passes_.erase(passes_.begin() + id); + } for (auto &pass : std::vector({ "depthwise_conv_mkldnn_pass", // "conv_bn_fuse_pass", // Execute BN passes again to diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index 02290ed33ff1cd4f72d707d6f9d23f16e05c321b..40bec389295abad2c433c0a6c073f1240efdf896 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -75,6 +75,10 @@ class PD_INFER_DECL PaddlePassBuilder { /// \param[in] pass_type the certain pass type to be deleted. void DeletePass(const std::string &pass_type); + /// \brief Get the certain position of a pass. + // \param[in] pass_type the type of insert pass. + size_t GetPassIndex(const std::string &pass_type); + /// \brief Delete all the passes. void ClearPasses(); diff --git a/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py b/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py index 9d9fbd39a5767ffe72ad579df2d31ac66eda2234..41816068c885af1eb4be932310ca1418c7721398 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py @@ -411,7 +411,7 @@ class Quant2Int8MkldnnPass(object): graph = self._apply_pass(graph, 'multi_gru_seq_fuse_pass') graph = self._apply_pass(graph, 'seq_concat_fc_fuse_pass') graph = self._apply_pass(graph, 'gpu_cpu_squeeze2_matmul_fuse_pass') - graph = self._apply_pass(graph, 'gpu_cpu_reshape2_matmul_fuse_pass') + #graph = self._apply_pass(graph, 'gpu_cpu_reshape2_matmul_fuse_pass') graph = self._apply_pass(graph, 'gpu_cpu_flatten2_matmul_fuse_pass') graph = self._apply_pass(graph, 'matmul_v2_scale_fuse_pass') graph = self._apply_pass(graph, 'squared_mat_sub_fuse_pass')