diff --git a/paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.cc b/paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.cc index a767d277e76890d9d5fa5f837779d9ce14bb41a1..3ef456b52bf00a415b8dda77a570ed5aaaef1dfc 100644 --- a/paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.cc +++ b/paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.cc @@ -99,15 +99,12 @@ void QuantDequantOpFuser::InsertNewNode(SSAGraph* graph, const int kNumFields = 5; const int kQuantizedWeightOffset = 0; const int kQuantizedOpOffset = 1; - const int kQuantizedOpOutOffset = 2; const int kDequantOpOffset = 3; const int kDequantOpOutOffset = 4; auto* quant_op_input = matched.at("quant_op_input"); auto* quant_op_in_scale = matched.at("quant_op_in_scale"); auto* quant_op = matched.at("quant_op"); - auto* quant_op_out_scale = matched.at("quant_op_out_scale"); - auto* quant_op_out = matched.at("quant_op_out"); std::vector nodes; for (int i = 0; i < times_; i++) { diff --git a/paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.h b/paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.h index be084eaf804a65781e13a44879c9bcd88a1363db..b4778aab182abf368461984bbfb9ef827b6c0fb9 100644 --- a/paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.h +++ b/paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.h @@ -23,6 +23,18 @@ namespace lite { namespace mir { namespace fusion { +/* The model trained by fluid quantization is a simulation of real int8. + * The quantized Ops(conv2d, mul, depthwise conv2d etc) have fake_quantop + * in front and fake_dequantop behind. + * + * When in int8 mode, the pattern like "fake_quant + quantized_op + + * fake_dequant" + * can be detected by this fuser. The fuser extract the input_scale and + * the weight_scale info from fake_quant, fake_dequant op and fuse those into + * the quantized_op. + * In addition, the fuser delete fake_quant and fake_dequant op in the graph at + * the last. + */ class QuantDequantOpFuser : public FuseBase { public: explicit QuantDequantOpFuser(const std::string& op_type, diff --git a/paddle/fluid/lite/core/optimizer.h b/paddle/fluid/lite/core/optimizer.h index bbe7f0a70a63a5a6d4b2e7fd1a397722e17a1bd1..2bfd4f77b656aec8461d845daf810fb5eba46ac0 100644 --- a/paddle/fluid/lite/core/optimizer.h +++ b/paddle/fluid/lite/core/optimizer.h @@ -48,22 +48,22 @@ class Optimizer { if (passes.empty()) { RunPasses(std::vector{{ - "lite_quant_dequant_fuse_pass", // - "lite_conv_bn_fuse_pass", // + "lite_quant_dequant_fuse_pass", // + "lite_conv_bn_fuse_pass", // "lite_conv_elementwise_add_activation_fuse_pass", // #ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK "lite_elementwise_add_activation_fuse_pass", // #endif - "lite_fc_fuse_pass", // - "static_kernel_pick_pass", // - "variable_place_inference_pass", // - "argument_type_display_pass", // - "type_target_transform_pass", // - "variable_place_inference_pass", // - "argument_type_display_pass", // - "io_copy_kernel_pick_pass", // - "variable_place_inference_pass", // - "runtime_context_assign_pass", // + "lite_fc_fuse_pass", // + "static_kernel_pick_pass", // + "variable_place_inference_pass", // + "argument_type_display_pass", // + "type_target_transform_pass", // + "variable_place_inference_pass", // + "argument_type_display_pass", // + "io_copy_kernel_pick_pass", // + "variable_place_inference_pass", // + "runtime_context_assign_pass", // }}); } else { RunPasses(passes); diff --git a/paddle/fluid/lite/operators/fake_dequantize_max_abs.h b/paddle/fluid/lite/operators/fake_dequantize_max_abs.h index 4df7215ff061e4ba14732ff8507fbcf6eb3cb0fe..de48c413041e65740843b821e687908ad323f052 100644 --- a/paddle/fluid/lite/operators/fake_dequantize_max_abs.h +++ b/paddle/fluid/lite/operators/fake_dequantize_max_abs.h @@ -33,9 +33,9 @@ class FakeDequantizeMaxAbsOpLite : public OpLite { explicit FakeDequantizeMaxAbsOpLite(const std::string &type) : OpLite(type) {} - bool CheckShape() const override {} + bool CheckShape() const override { return true; } - bool InferShape() const override {} + bool InferShape() const override { return true; } bool AttachImpl(const cpp::OpDesc &op_desc, lite::Scope *scope) override { auto x = op_desc.Input("X").front(); diff --git a/paddle/fluid/lite/operators/fake_quantize_moving_avg_max_abs.h b/paddle/fluid/lite/operators/fake_quantize_moving_avg_max_abs.h index 1db4f3bf62064ef38c654d557b6f986d0d806fd6..547584e165134596588d411dbfa3e956d63b8b46 100644 --- a/paddle/fluid/lite/operators/fake_quantize_moving_avg_max_abs.h +++ b/paddle/fluid/lite/operators/fake_quantize_moving_avg_max_abs.h @@ -34,9 +34,9 @@ class FakeQuantizeMovingAvgMaxAbsOpLite : public OpLite { explicit FakeQuantizeMovingAvgMaxAbsOpLite(const std::string &type) : OpLite(type) {} - bool CheckShape() const override {} + bool CheckShape() const override { return true; } - bool InferShape() const override {} + bool InferShape() const override { return true; } bool AttachImpl(const cpp::OpDesc &op_desc, lite::Scope *scope) override { auto x = op_desc.Input("X").front();