From 806832e09163500fa01b8e9eabb871424dc26dbd Mon Sep 17 00:00:00 2001
From: Zhen Wang <wangzhen31@baidu.com>
Date: Tue, 5 Mar 2019 20:15:41 +0800
Subject: [PATCH] update the input format of channel wise dequantize op.

---
 paddle/fluid/operators/fake_dequantize_op.cc  | 42 ++++++++-----------
 paddle/fluid/operators/fake_dequantize_op.h   | 38 +++++++----------
 .../unittests/test_fake_dequantize_op.py      | 27 ++++++------
 3 files changed, 46 insertions(+), 61 deletions(-)
diff --git a/paddle/fluid/operators/fake_dequantize_op.cc b/paddle/fluid/operators/fake_dequantize_op.cc
index 73ffaae6a5..68c7227e5a 100644
--- a/paddle/fluid/operators/fake_dequantize_op.cc
+++ b/paddle/fluid/operators/fake_dequantize_op.cc
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include "paddle/fluid/operators/fake_dequantize_op.h"
 #include <string>
+#include <vector>
 
 namespace paddle {
 namespace operators {
@@ -84,8 +85,8 @@ class FakeChannelWiseDequantizeMaxAbsOp : public framework::OperatorWithKernel {
     PADDLE_ENFORCE(
         ctx->HasInput("X"),
         "Input(X) of FakeChannelWiseDequantizeMaxAbsOp should not be null.");
-    PADDLE_ENFORCE(ctx->HasInput("WeightScales"),
-                   "Input(WeightScales) of FakeChannelWiseDequantizeMaxAbsOp "
+    PADDLE_ENFORCE(ctx->HasInputs("Scales"),
+                   "Input(Scales) of FakeChannelWiseDequantizeMaxAbsOp "
                    "should not be null.");
     PADDLE_ENFORCE(
         ctx->HasOutput("Out"),
@@ -103,39 +104,32 @@ class FakeChannelWiseDequantizeMaxAbsOpMaker
     AddInput("X",
              "(Tensor) The input with float-32/64 type is the "
              "low precision tensor.");
-    AddInput("ActivationScale",
-             "(float) The activation scale in quantization stage.")
-        .AsDispensable();
-    AddInput("WeightScales",
-             "(float array) The weight scales in quantization stage.");
+    AddInput("Scales",
+             "(Tensors) The scales in quantization stage. "
+             "Now, `Scales` is a vector with at most two tensors. "
+             "If Scales has two elements, the second tensor should only have "
+             "one value.")
+        .AsDuplicable();
     AddOutput("Out",
               "(Tensor) The output is the dequantized high "
               "precision tensor.");
-    AddAttr<int>("activation_bits", "Quantization bit number for activation.")
-        .SetDefault(8)
-        .AddCustomChecker([](const int& bit_length) {
-          PADDLE_ENFORCE(bit_length >= 1 && bit_length <= 16,
-                         "'activation_bits' should be between 1 and 16.");
-        });
-    AddAttr<int>("weight_bits", "Quantization bit number for weights.")
-        .SetDefault(8)
-        .AddCustomChecker([](const int& bit_length) {
-          PADDLE_ENFORCE(bit_length >= 1 && bit_length <= 16,
-                         "'weight_bits' should be between 1 and 16.");
-        });
+    AddAttr<std::vector<int>>(
+        "quant_bits",
+        "Quantization bit numbers in quantization stage. "
+        "The size of `quant_bits` should be equal to the size of `Scales`.")
+        .SetDefault({8});
 
     AddComment(R"DOC(
 FakeChannelWiseDequantizeMaxAbsOp operator.
 
 This calculation is an opposite operation of FakeChannelWiseQuantizeMaxAbsOp:
 
-$$Out_c = \frac{ActivationScale*WeightScale_c*X_c}{(2^{weight\_bits-1}-1)*(2^{activation\_bits-1}-1)}$$
+$$Out_c = \frac{X_c\prod_{i=1}^{n}Scales_{ic}}{\prod_{i=1}^{n}(2^{quant\_bits_i-1}-1)}$$
 
-In the above formula, the range value of c is as follow:
-$$0 \leq c \lt \ the\ channel\ number\ of\ X$$
+In the above formula, the range value of $c$ can be represented as $0 \leq c \lt \ the\ channel\ number\ of\ X$.
+Besides, the size of $quant\_bits$ should be equal to the size of $Scales$, and it is called $n$  in the formula.
 
-Notes: Tha per-channel quantization is only applied to weights(channel size scale).
-And the activations use per-layer quantization(only one scale).
+Notes: In general, the per-channel quantization is only applied to weights and the activations use per-layer quantization.
 )DOC");
   }
 };
diff --git a/paddle/fluid/operators/fake_dequantize_op.h b/paddle/fluid/operators/fake_dequantize_op.h
index c26dfa8332..549f5039f4 100644
--- a/paddle/fluid/operators/fake_dequantize_op.h
+++ b/paddle/fluid/operators/fake_dequantize_op.h
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #pragma once
 
+#include <vector>
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 
@@ -50,47 +51,40 @@ class FakeChannelWiseDequantizeMaxAbsKernel : public framework::OpKernel<T> {
  public:
   virtual void Compute(const framework::ExecutionContext& ctx) const {
     auto* in = ctx.Input<framework::Tensor>("X");
-    auto* weight_scales = ctx.Input<framework::Tensor>("WeightScales");
+    auto scales = ctx.MultiInput<framework::Tensor>("Scales");
     auto* out = ctx.Output<framework::Tensor>("Out");
 
-    PADDLE_ENFORCE_EQ(weight_scales->numel(), in->dims()[0],
-                      "The weight uses the per-channel quantization type, so "
-                      "the number of weight scale values must be the same with "
+    PADDLE_ENFORCE_EQ(scales[0]->numel(), in->dims()[0],
+                      "The number of first scale values must be the same with "
                       "first dimension value of Input(X).");
 
-    int ativation_bits = ctx.Attr<int>("activation_bits");
-    int weight_bits = ctx.Attr<int>("weight_bits");
-    int range = std::pow(2, weight_bits - 1) - 1;
+    auto quant_bits = ctx.Attr<std::vector<int>>("quant_bits");
+    int max_range = std::pow(2, quant_bits[0] - 1) - 1;
 
     auto& dev_ctx = ctx.template device_context<DeviceContext>();
     out->mutable_data<T>(dev_ctx.GetPlace());
 
     auto dequant = DequantizeFunctor<DeviceContext, T>();
-    if (ctx.HasInput("ActivationScale")) {
-      auto* activation_scale = ctx.Input<framework::Tensor>("ActivationScale");
-      PADDLE_ENFORCE_EQ(activation_scale->numel(), 1,
-                        "The activation uses per-layer quantization type, so "
-                        "it must have only one value.");
-      framework::Tensor cpu_weigth_scales;
-      framework::TensorCopy(*weight_scales, platform::CPUPlace(),
-                            &cpu_weigth_scales);
-      dev_ctx.Wait();
-      const T* weight_scales_data = cpu_weigth_scales.data<T>();
-      range *= (std::pow(2, ativation_bits - 1) - 1);
+    if (scales.size() == 2) {
+      PADDLE_ENFORCE_EQ(
+          scales[1]->numel(), 1,
+          "The second scale tensor should only have one value at now.");
       for (int64_t i = 0; i < in->dims()[0]; i++) {
         framework::Tensor one_channel_in = in->Slice(i, i + 1);
         framework::Tensor one_channel_out = out->Slice(i, i + 1);
-        auto max_range = range / weight_scales_data[i];
-        dequant(dev_ctx, &one_channel_in, activation_scale,
+        framework::Tensor one_channel_scale = scales[0]->Slice(i, i + 1);
+        max_range *= (std::pow(2, quant_bits[1] - 1) - 1);
+        dequant(dev_ctx, &one_channel_in, &one_channel_scale,
                 static_cast<T>(max_range), &one_channel_out);
       }
+      dequant(dev_ctx, out, scales[1], static_cast<T>(1), out);
     } else {
       for (int64_t i = 0; i < in->dims()[0]; i++) {
         framework::Tensor one_channel_in = in->Slice(i, i + 1);
         framework::Tensor one_channel_out = out->Slice(i, i + 1);
-        framework::Tensor one_channel_scale = weight_scales->Slice(i, i + 1);
+        framework::Tensor one_channel_scale = scales[0]->Slice(i, i + 1);
         dequant(dev_ctx, &one_channel_in, &one_channel_scale,
-                static_cast<T>(range), &one_channel_out);
+                static_cast<T>(max_range), &one_channel_out);
       }
     }
   }
diff --git a/python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py b/python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py
index bd8dad4d59..8d91d8fd1d 100644
--- a/python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py
@@ -49,53 +49,50 @@ def channel_wise_dequantize_max_abs(x, scales, max_range):
     return y
 
 
-class TestFakeChannelWiseDequantizeMaxAbsOp(OpTest):
+class TestFakeChannelWiseDequantizeMaxAbsOpTwoScales(OpTest):
     def set_args(self):
-        self.weight_bits = 8
-        self.activation_bits = 2
+        self.quant_bits = [8, 2]
         self.data_type = "float32"
 
     def setUp(self):
         self.set_args()
         self.op_type = "fake_channel_wise_dequantize_max_abs"
         x = np.random.randn(4, 3, 64, 64).astype(self.data_type)
-        max_range = math.pow(2, self.weight_bits - 1) - 1
+        max_range = math.pow(2, self.quant_bits[0] - 1) - 1
+        max_range *= (math.pow(2, self.quant_bits[1] - 1) - 1)
         yq, scales = channel_wise_quantize_max_abs(x, max_range)
         ydq = channel_wise_dequantize_max_abs(yq, scales, max_range)
 
         self.inputs = {
             'X': yq,
-            'ActivationScale': np.array(1.0).astype(self.data_type),
-            'WeightScales': np.array(scales).astype(self.data_type)
-        }
-        self.attrs = {
-            'weight_bits': self.weight_bits,
-            'activation_bits': self.activation_bits
+            'Scales': [("scales0", np.array(scales).astype(self.data_type)),
+                       ("scales1", np.array([1.0]).astype(self.data_type))]
         }
+        self.attrs = {'quant_bits': self.quant_bits}
         self.outputs = {'Out': ydq}
 
     def test_check_output(self):
         self.check_output()
 
 
-class TestFakeChannelWiseDequantizeMaxAbsOpNoActivationScale(OpTest):
+class TestFakeChannelWiseDequantizeMaxAbsOpOneScale(OpTest):
     def set_args(self):
-        self.weight_bits = 8
+        self.quant_bits = [8]
         self.data_type = "float32"
 
     def setUp(self):
         self.set_args()
         self.op_type = "fake_channel_wise_dequantize_max_abs"
         x = np.random.randn(4, 3, 64, 64).astype(self.data_type)
-        max_range = math.pow(2, self.weight_bits - 1) - 1
+        max_range = math.pow(2, self.quant_bits[0] - 1) - 1
         yq, scales = channel_wise_quantize_max_abs(x, max_range)
         ydq = channel_wise_dequantize_max_abs(yq, scales, max_range)
 
         self.inputs = {
             'X': yq,
-            'WeightScales': np.array(scales).astype(self.data_type)
+            'Scales': [("scales0", np.array(scales).astype(self.data_type))]
         }
-        self.attrs = {'weight_bits': self.weight_bits}
+        self.attrs = {'quant_bits': self.quant_bits}
         self.outputs = {'Out': ydq}
 
     def test_check_output(self):
-- 
GitLab