diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc index eebc87f5d99880a43394e41f1631fb9671efa9e1..879c669bbbee6a0e090e8759dc8d0d9215ba7b92 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc @@ -188,7 +188,8 @@ class DeQuantizer final : public Quanter { bool IsNotPermittedName(const std::string& output_name) const override { std::unordered_map> block_list{ {"layer_norm", - {"Mean", "Variance"}}}; // not used in inference in MKLDNN + {"Mean", "Variance"}}, // not used in inference in MKLDNN + {"fc", {"ResidualData"}}}; // artifical output, already dequantized std::vector blocked_outputs{"XShape"}; // blocklist for any op auto op_name = op->Name(); diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 3b1c84db4c5346352850457324fe79ed63c66a60..04e77faf2e34f2909c618322612f5f60bcc5e4ba 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -348,6 +348,10 @@ void CpuPassStrategy::EnableMkldnnQuantizer() { void CpuPassStrategy::EnableMkldnnBfloat16() { #ifdef PADDLE_WITH_MKLDNN if (!use_mkldnn_bfloat16_) { + passes_.push_back("fc_mkldnn_pass"); + passes_.push_back("fc_act_mkldnn_fuse_pass"); + passes_.push_back("fc_elementwise_add_mkldnn_fuse_pass"); + passes_.push_back("cpu_bfloat16_placement_pass"); passes_.push_back("cpu_bfloat16_pass"); passes_.push_back("cpu_quantize_squash_pass");