Add one pattern to remove the quantize->dequantize pairs for the floating-point constants

The existence of this pattern indicates the user op doesn't have sufficient quantization parameters to be quantized. Then we should keep the floating-point constants as float. PiperOrigin-RevId: 295063207 Change-Id: I759ec06bcda8d15c27834b890dfd41fba3e08d17

Add one pattern to remove the quantize->dequantize pairs for the floating-point constants
The existence of this pattern indicates the user op doesn't have sufficient quantization parameters to be quantized. Then we should keep the floating-point constants as float. PiperOrigin-RevId: 295063207 Change-Id: I759ec06bcda8d15c27834b890dfd41fba3e08d17
8275e9b3 · Feng Liu · TensorFlower Gardener · 0b772099 · 8275e9b3 · 8275e9b3
3 changed file
--- a/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h
+++ b/tensorflow/compiler/mlir/lite/quantization/quantization_utils.h
@@ -150,7 +150,8 @@ struct QuantizationPattern : public RewritePattern {

  explicit QuantizationPattern(MLIRContext* context, bool enable_verify,
                               float error_tolerance, bool single_layer_verify)
-      : RewritePattern(DQ::getOperationName(), 1, context),
+      // Set the score to a large number so it is always preferred.
+      : RewritePattern(DQ::getOperationName(), 300, context),
        enable_verify(enable_verify),
        error_tolerance(error_tolerance),
        single_layer_verify(single_layer_verify) {}

--- a/tensorflow/compiler/mlir/lite/tests/quantize.mlir
+++ b/tensorflow/compiler/mlir/lite/tests/quantize.mlir
@@ -2,39 +2,44 @@
 // RUN: tf-opt %s -tfl-prepare-quantize -tfl-quantize -tfl-numeric-verify | FileCheck --check-prefix=DEBUG %s

 // CHECK-LABEL: QuantizeFloatConst
-func @QuantizeFloatConst() -> tensor<f32> {
+func @QuantizeFloatConst() -> tensor<2x2x!quant.uniform<u8:f32, 7.8431372549019615E-4:128>> {
  %0 = constant dense<-0.1> : tensor<2x2xf32>
-  %1 = "tfl.quantize"(%0) {qtype = tensor<!quant.uniform<u8:f32, 7.8431372549019615E-4:128>>} : (tensor<2x2xf32>) -> tensor<!quant.uniform<u8:f32, 7.8431372549019615E-4:128>>
-  %2 = "tfl.dequantize"(%1) : (tensor<!quant.uniform<u8:f32, 7.8431372549019615E-4:128>>) -> tensor<f32>
-  return %2 : tensor<f32>
+  %1 = "tfl.quantize"(%0) {qtype = tensor<2x2x!quant.uniform<u8:f32, 7.8431372549019615E-4:128>>} : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform<u8:f32, 7.8431372549019615E-4:128>>
+  return %1 : tensor<2x2x!quant.uniform<u8:f32, 7.8431372549019615E-4:128>>

-// CHECK:  %[[cst:.*]] = "tfl.pseudo_qconst"() {qtype = tensor<!quant.uniform<u8:f32, 7.8431372549019615E-4:128>>, value = dense<0> : tensor<2x2xi8>}
-// CHECK:  %[[dq:.*]] = "tfl.dequantize"(%[[cst]])
-// CHECK:  return %[[dq]] : tensor<f32>
+// CHECK:  %[[cst:.*]] = "tfl.pseudo_qconst"() {qtype = tensor<2x2x!quant.uniform<u8:f32, 7.8431372549019615E-4:128>>, value = dense<0> : tensor<2x2xi8>}
+// CHECK:  return %[[cst]]
 }

 // CHECK-LABEL: QuantizeDenseFloatConst
-func @QuantizeDenseFloatConst() -> tensor<2x2xf32> {
+func @QuantizeDenseFloatConst() -> tensor<2x2x!quant.uniform<u8:f32, 7.8431372549019615E-4:128>> {
  %0 = constant dense<[[-0.1, 1.0], [1.0, 3.0]]> : tensor<2x2xf32>
  %1 = "tfl.quantize"(%0) {qtype = tensor<2x2x!quant.uniform<u8:f32, 7.8431372549019615E-4:128>>} : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform<u8:f32, 7.8431372549019615E-4:128>>
-  %2 = "tfl.dequantize"(%1) : (tensor<2x2x!quant.uniform<u8:f32, 7.8431372549019615E-4:128>>) -> tensor<2x2xf32>
-  return %2 : tensor<2x2xf32>
+  return %1 : tensor<2x2x!quant.uniform<u8:f32, 7.8431372549019615E-4:128>>

 // CHECK:  %[[cst:.*]] = "tfl.pseudo_qconst"() {qtype = tensor<2x2x!quant.uniform<u8:f32, 7.8431372549019615E-4:128>>, value = dense<{{\[\[}}0, -1], {{\[}}-1, -1]]> : tensor<2x2xi8>}
-// CHECK:  %[[dq:.*]] = "tfl.dequantize"(%[[cst]])
-// CHECK:  return %[[dq]] : tensor<2x2xf32>
+// CHECK:  return %[[cst]]
 }

 // CHECK-LABEL: QuantizeSplatFloatConst
-func @QuantizeSplatFloatConst() -> tensor<2x2xf32> {
+func @QuantizeSplatFloatConst() -> tensor<2x2x!quant.uniform<u8:f32, 7.8431372549019615E-4:128>> {
  %0 = constant dense<3.0> : tensor<2x2xf32>
  %1 = "tfl.quantize"(%0) {qtype = tensor<2x2x!quant.uniform<u8:f32, 7.8431372549019615E-4:128>>} : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform<u8:f32, 7.8431372549019615E-4:128>>
+  return %1 : tensor<2x2x!quant.uniform<u8:f32, 7.8431372549019615E-4:128>>
+
+// CHECK:  %[[cst:.*]] = "tfl.pseudo_qconst"() {qtype = tensor<2x2x!quant.uniform<u8:f32, 7.8431372549019615E-4:128>>, value = dense<-1> : tensor<2x2xi8>}
+// CHECK:  return %[[cst]]
+}
+
+// CHECK-LABEL: NotQuantizeFloatConst
+func @NotQuantizeFloatConst() -> tensor<2x2xf32> {
+  %0 = constant dense<-0.1> : tensor<2x2xf32>
+  %1 = "tfl.quantize"(%0) {qtype = tensor<2x2x!quant.uniform<u8:f32, 7.8431372549019615E-4:128>>} : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform<u8:f32, 7.8431372549019615E-4:128>>
  %2 = "tfl.dequantize"(%1) : (tensor<2x2x!quant.uniform<u8:f32, 7.8431372549019615E-4:128>>) -> tensor<2x2xf32>
  return %2 : tensor<2x2xf32>

-// CHECK:  %[[cst:.*]] = "tfl.pseudo_qconst"() {qtype = tensor<2x2x!quant.uniform<u8:f32, 7.8431372549019615E-4:128>>, value = dense<-1> : tensor<2x2xi8>}
-// CHECK:  %[[dq:.*]] = "tfl.dequantize"(%[[cst]])
-// CHECK:  return %[[dq]] : tensor<2x2xf32>
+// CHECK:  %[[cst:.*]] = constant dense<-1.000000e-01> : tensor<2x2xf32>
+// CHECK:  return %[[cst]] : tensor<2x2xf32>
 }

 // CHECK-LABEL: DequantizeAndQuantize

--- a/tensorflow/compiler/mlir/lite/transforms/quantize_patterns.td
+++ b/tensorflow/compiler/mlir/lite/transforms/quantize_patterns.td
@@ -21,12 +21,20 @@ include "tensorflow/compiler/mlir/lite/ir/tfl_ops.td"

 // Quantize attribute $0 by using quantization parameter from %1.
 def QuantizeByQuantizedType : NativeCodeCall<"quant::Quantize($0, $1.getValue())">;
+def F32ElementsAttr : ElementsAttrBase<
+  CPred<"$_self.cast<ElementsAttr>().getType().getElementType().isF32()">, "float constant tensor">;

 // Squash tfl.dequantize and tfl.quantize pairs.
 // TODO(fengliuai): Compare the scale of input and output. This can also be
 // squashed to a requantize op if the scales are different.
 def : Pat<(TFL_QuantizeOp (TFL_DequantizeOp $in), $qt), (replaceWithValue $in)>;

+// If the tfl.dequantize op wasn't fused, we shouldn't quantize the floating
+// point constant.
+def : Pat<(TFL_DequantizeOp
+             (TFL_QuantizeOp (ConstantOp F32ElementsAttr:$cst), $qt)),
+          (ConstantOp $cst)>;
+
 // Quantize the value of a constant op if the quantization parameters have been
 // propagated to the output.
 def : Pat<(TFL_QuantizeOp