Add support for (de/re)quantization with shift (#27481)

42d17538 · Wojciech Uss · GitHub · 8da2b16d · 42d17538 · 42d17538
9 changed file
--- a/paddle/fluid/operators/dequantize_op.cc
+++ b/paddle/fluid/operators/dequantize_op.cc
@@ -31,9 +31,10 @@ framework::OpKernelType DeQuantOp::GetExpectedKernelType(
 }
 void DeQuantOpMaker::Make() {
-  AddInput("Input", "input data");
+  AddInput("Input", "Input data");
-  AddOutput("Output", "output data");
+  AddOutput("Output", "Output data");
-  AddAttr<float>("Scale", "scale data").SetDefault({1.0f});
+  AddAttr<float>("Scale", "Scale data").SetDefault({1.0f});
+  AddAttr<float>("Shift", "Shift data").SetDefault({0.0f});
  AddComment(R"DOC(This op will dequantize data from INT8 to FP32)DOC");
 }

--- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
@@ -16,6 +16,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/data_layout_transform.h"
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/operators/dequantize_op.h"
+#include "paddle/fluid/platform/errors.h"
 #include "paddle/fluid/platform/mkldnn_helper.h"
 #include "paddle/fluid/platform/mkldnn_reuse.h"
@@ -37,14 +38,29 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
  void Compute(const framework::ExecutionContext& ctx) const override {
    auto* input = ctx.Input<Tensor>("Input");
    auto scale_data = ctx.Attr<float>("Scale");
+    auto scale_shift = ctx.Attr<float>("Shift");
+    bool with_shift = scale_shift != 0.0f;
    auto* output = ctx.Output<Tensor>("Output");
+    PADDLE_ENFORCE_NE(scale_data, 0.0f,
+                      platform::errors::InvalidArgument(
+                          "Dequantization scale cannot be 0.0"));
+    PADDLE_ENFORCE_GE(scale_shift, 0,
+                      platform::errors::Unimplemented(
+                          "Dequantization shift must be nonnegative."));
+    PADDLE_ENFORCE_LE(
+        scale_shift, 255,
+        platform::errors::Unimplemented(
+            "Dequantization shift must be less than or equal to 255."));
    auto& dev_ctx =
        ctx.template device_context<platform::MKLDNNDeviceContext>();
    const auto& engine = dev_ctx.GetEngine();
    const T* input_data = input->data<T>();
    float* output_data = output->mutable_data<float>(ctx.GetPlace());
-    std::vector<float> reorder_scale = {1.0f / scale_data};
+    float reorder_shift = -scale_shift / scale_data;
    auto src_tz = paddle::framework::vectorize<int64_t>(input->dims());
    auto dst_tz = paddle::framework::vectorize<int64_t>(output->dims());
@@ -65,7 +81,15 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
    if (reorder_p == nullptr) {
      mkldnn::primitive_attr attri;
      int mask = 0;
-      attri.set_output_scales(mask, reorder_scale);
+      float reorder_scale = 1. / scale_data;
+      attri.set_output_scales(mask, {reorder_scale});
+      if (with_shift) {
+        mkldnn::post_ops post_operations;
+        post_operations.append_sum();
+        attri.set_post_ops(post_operations);
+        std::fill(output_data, output_data + output->numel(), reorder_shift);
+      }
      auto src_md = platform::MKLDNNMemDesc({src_tz}, src_dt, src_fmt);
      src_memory = std::make_shared<mkldnn::memory>(
@@ -92,6 +116,8 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
      dst_memory = std::static_pointer_cast<mkldnn::memory>(
          dev_ctx.GetBlob(key_dst_mem));
+      if (with_shift)
+        std::fill(output_data, output_data + output->numel(), reorder_shift);
      dst_memory->set_data_handle(output->mutable_data<float>(ctx.GetPlace()));
    }

--- a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc
@@ -36,7 +36,21 @@ class QuantOpKernel : public framework::OpKernel<T> {
  void Compute(const framework::ExecutionContext& ctx) const override {
    auto* input = ctx.Input<Tensor>("Input");
    auto scale_data = ctx.Attr<float>("Scale");
+    auto scale_shift = ctx.Attr<float>("Shift");
+    bool with_shift = scale_shift != 0.0f;
    auto* output = ctx.Output<Tensor>("Output");
+    PADDLE_ENFORCE_NE(
+        scale_data, 0.0f,
+        platform::errors::InvalidArgument("Quantization scale cannot be 0.0"));
+    PADDLE_ENFORCE_GE(scale_shift, 0,
+                      platform::errors::Unimplemented(
+                          "Quantization shift must be nonnegative."));
+    PADDLE_ENFORCE_LE(
+        scale_shift, 255,
+        platform::errors::Unimplemented(
+            "Quantization shift must be less than or equal to 255."));
    auto& dev_ctx =
        ctx.template device_context<platform::MKLDNNDeviceContext>();
    const auto& engine = dev_ctx.GetEngine();
@@ -47,11 +61,12 @@ class QuantOpKernel : public framework::OpKernel<T> {
    const T* input_data = input->data<T>();
-    bool is_negative = ctx.Attr<bool>("is_negative_input");
+    bool is_negative_input = ctx.Attr<bool>("is_negative_input");
    bool bfloat16 = ctx.Attr<bool>("bfloat16");
-    std::string key =
-        platform::CreateKey(platform::ThreadIDasStr(), src_tz, scale_data,
+    std::string key = platform::CreateKey(
-                            is_negative, ctx.OutputName("Output"));
+        platform::ThreadIDasStr(), src_tz, scale_data, scale_shift,
+        is_negative_input, ctx.OutputName("Output"));
    const std::string key_prim = key + "@r";
    const std::string key_src_mem = key + "@s";
    const std::string key_dst_mem = key + "@d";
@@ -69,6 +84,15 @@ class QuantOpKernel : public framework::OpKernel<T> {
      int mask = 0;
      attri.set_output_scales(mask, {scale_data});
+      if (with_shift) {
+        mkldnn::post_ops post_operations;
+        post_operations.append_sum();
+        attri.set_post_ops(post_operations);
+        uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace());
+        // memset casts scale_shift to unsigned char (uint8_t) internally
+        std::memset(output_data, scale_shift, output->numel());
+      }
      auto src_md = platform::MKLDNNMemDesc({src_tz}, memory::data_type::f32,
                                            input->format());
      src_memory = std::make_shared<mkldnn::memory>(
@@ -78,7 +102,7 @@ class QuantOpKernel : public framework::OpKernel<T> {
      if (bfloat16) {
        platform::SetDstMemoryQuantized<paddle::platform::bfloat16>(
            ctx, output, dst_tz, engine, dst_md, dst_memory, out_format);
-      } else if (is_negative) {
+      } else if (is_negative_input && !with_shift) {
        platform::SetDstMemoryQuantized<int8_t>(ctx, output, dst_tz, engine,
                                                dst_md, dst_memory, out_format);
      } else {
@@ -104,10 +128,13 @@ class QuantOpKernel : public framework::OpKernel<T> {
      if (bfloat16) {
        dst_memory->set_data_handle(
            output->mutable_data<paddle::platform::bfloat16>(place));
-      } else if (is_negative) {
+      } else if (with_shift || !is_negative_input) {
-        dst_memory->set_data_handle(output->mutable_data<int8_t>(place));
+        uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace());
+        if (with_shift) std::memset(output_data, scale_shift, output->numel());
+        dst_memory->set_data_handle(output_data);
      } else {
-        dst_memory->set_data_handle(output->mutable_data<uint8_t>(place));
+        dst_memory->set_data_handle(
+            output->mutable_data<int8_t>(ctx.GetPlace()));
      }
    }

--- a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
@@ -26,20 +26,45 @@ using dnnl::reorder;
 using platform::to_void_cast;
 using Tensor = framework::Tensor;
+namespace {
+inline uint8_t clip_to_uint8(float x) {
+  return std::max(0L, std::min(255L, std::lround(x)));
+}
+}  // namespace
 template <typename T>
 class ReQuantOpKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
    auto* input = ctx.Input<Tensor>("Input");
    auto scale_in = ctx.Attr<float>("Scale_in");
+    auto shift_in = ctx.Attr<float>("Shift_in");
    auto scale_out = ctx.Attr<float>("Scale_out");
+    auto shift_out = ctx.Attr<float>("Shift_out");
+    bool with_shift = shift_in != 0.0f || shift_out != 0.0f;
    auto* output = ctx.Output<Tensor>("Output");
+    PADDLE_ENFORCE_NE(scale_in, 0.0f, platform::errors::InvalidArgument(
+                                          "Scale of input cannot be 0.0"));
+    PADDLE_ENFORCE_NE(scale_out, 0.0f, platform::errors::InvalidArgument(
+                                           "Scale of output cannot be 0.0"));
+    if (shift_in != 0.0f) {
+      PADDLE_ENFORCE_EQ(
+          input->type(), framework::proto::VarType::UINT8,
+          platform::errors::Unimplemented("Requantize does not support nonzero "
+                                          "shift for signed input."));
+    }
    auto& dev_ctx =
        ctx.template device_context<platform::MKLDNNDeviceContext>();
    const auto& engine = dev_ctx.GetEngine();
    auto src_tz = paddle::framework::vectorize(input->dims());
+    float reorder_scale = scale_out / scale_in;
    std::string key =
        platform::CreateKey(platform::ThreadIDasStr(), src_tz, scale_in,
                            scale_out, ctx.OutputName("Output"));
@@ -53,28 +78,37 @@ class ReQuantOpKernel : public framework::OpKernel<T> {
    reorder_p = std::static_pointer_cast<reorder>(dev_ctx.GetBlob(key_prim));
    const T* input_data = input->data<T>();
-    T* output_data = output->mutable_data<T>(ctx.GetPlace());
    if (reorder_p == nullptr) {
-      dnnl::primitive_attr attri;
+      auto dst_tz = framework::vectorize(output->dims());
-      int mask = 0;
+      auto src_dt = framework::ToMKLDNNDataType(input->type());
-      float scale_shift = scale_out / scale_in;
+      auto dst_dt = with_shift ? framework::MKLDNNDataType::u8 : src_dt;
-      attri.set_output_scales(mask, {scale_shift});
-      auto dst_tz = paddle::framework::vectorize(output->dims());
-      dnnl::memory::data_type src_dt =
-          paddle::framework::ToMKLDNNDataType(input->type());
-      dnnl::memory::data_type dst_dt = src_dt;
      auto src_md =
          platform::MKLDNNMemDesc({src_tz}, src_dt, MKLDNNMemoryFormat::nhwc);
      src_memory = std::make_shared<dnnl::memory>(src_md, engine,
                                                  to_void_cast<T>(input_data));
      auto dst_md =
          platform::MKLDNNMemDesc({dst_tz}, dst_dt, MKLDNNMemoryFormat::nhwc);
-      dst_memory = std::make_shared<dnnl::memory>(dst_md, engine,
-                                                  to_void_cast<T>(output_data));
+      dnnl::primitive_attr attri;
+      int mask = 0;
+      attri.set_output_scales(mask, {reorder_scale});
+      if (with_shift) {
+        mkldnn::post_ops post_operations;
+        post_operations.append_sum();
+        attri.set_post_ops(post_operations);
+        uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace());
+        uint8_t reorder_shift =
+            clip_to_uint8(shift_out - reorder_scale * shift_in);
+        std::memset(output_data, reorder_shift, output->numel());
+        dst_memory = std::make_shared<dnnl::memory>(
+            dst_md, engine, to_void_cast<uint8_t>(output_data));
+      } else {
+        T* output_data = output->mutable_data<T>(ctx.GetPlace());
+        dst_memory = std::make_shared<dnnl::memory>(
+            dst_md, engine, to_void_cast<T>(output_data));
+      }
      auto reorder_pd =
          reorder::primitive_desc(*src_memory, *dst_memory, attri);
@@ -90,7 +124,17 @@ class ReQuantOpKernel : public framework::OpKernel<T> {
      dst_memory =
          std::static_pointer_cast<dnnl::memory>(dev_ctx.GetBlob(key_dst_mem));
-      dst_memory->set_data_handle(output_data);
+      if (with_shift) {
+        uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace());
+        uint8_t reorder_shift =
+            clip_to_uint8(shift_out - reorder_scale * shift_in);
+        std::memset(output_data, reorder_shift, output->numel());
+        dst_memory->set_data_handle(output_data);
+      } else {
+        T* output_data = output->mutable_data<T>(ctx.GetPlace());
+        dst_memory->set_data_handle(output_data);
+      }
    }
    dnnl::stream astream(engine);

--- a/paddle/fluid/operators/quantize_op.cc
+++ b/paddle/fluid/operators/quantize_op.cc
@@ -31,12 +31,16 @@ framework::OpKernelType QuantOp::GetExpectedKernelType(
 }
 void QuantOpMaker::Make() {
-  AddInput("Input", "input data");
+  AddInput("Input", "Input data");
-  AddOutput("Output", "output data");
+  AddOutput("Output", "Output data");
  AddAttr<bool>("is_negative_input",
                "(bool, default false) Only used in mkldnn INT8 kernel")
      .SetDefault(false);
-  AddAttr<float>("Scale", "scale data").SetDefault({1.0f});
+  AddAttr<float>("Scale", "Scale data").SetDefault({1.0f});
+  AddAttr<float>(
+      "Shift",
+      "Shift data. When Shift is non-zero, data is quantized to unsigned int8.")
+      .SetDefault({0.0f});
  AddAttr<std::string>("output_format",
                       "Convert format to NHWC or NCHW during quantization.")
      .SetDefault("NHWC");

--- a/paddle/fluid/operators/requantize_op.cc
+++ b/paddle/fluid/operators/requantize_op.cc
@@ -31,10 +31,12 @@ framework::OpKernelType ReQuantOp::GetExpectedKernelType(
 }
 void ReQuantOpMaker::Make() {
-  AddInput("Input", "input data");
+  AddInput("Input", "Input data");
-  AddOutput("Output", "output data");
+  AddOutput("Output", "Output data");
-  AddAttr<float>("Scale_in", "scale in data").SetDefault({1.0f});
+  AddAttr<float>("Scale_in", "Scale in data").SetDefault({1.0f});
-  AddAttr<float>("Scale_out", "scale out data").SetDefault({1.0f});
+  AddAttr<float>("Scale_out", "Scale out data").SetDefault({1.0f});
+  AddAttr<float>("Shift_in", "Shift in data").SetDefault({1.0f});
+  AddAttr<float>("Shift_out", "Shift out data").SetDefault({1.0f});
  AddComment(
      R"DOC(This op will re-quantize data from INT8 with scale_in to INT8 with scale_out)DOC");
 }

--- a/python/paddle/fluid/tests/unittests/mkldnn/test_dequantize_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_dequantize_mkldnn_op.py
@@ -22,40 +22,69 @@ from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16
 class TestDeQuantizeOp(OpTest):
    def setUp(self):
        self.op_type = 'dequantize'
-        self.scale = 2.0
+        self.scale = 127.0
-        self.input_size = [1, 1, 5, 5]  #Naive nChw16c
+        self.shift = 0.0
+        self.input_size = [1, 1, 5, 5]  # Naive nChw16c
        self.data_type = 'int8'
        self.set_scale()
+        self.set_shift()
        self.set_data_type()
+        self.set_input_size()
+        if self.data_type == 'uint16':
+            self.prepare_input_output_bf16()
+        else:
+            self.prepare_input_int8()
+            self.prepare_output_int8()
+    def prepare_input_output_bf16(self):
+        output = np.random.random(self.input_size).astype(np.float32)
+        input = convert_float_to_uint16(output)
+        self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(input)}
+        self.outputs = {'Output': output}
+    def prepare_input_int8(self):
        if self.data_type == 'int8':
-            input = (np.random.randint(0, 100, self.input_size) - 50
+            # input data values are integers from interval [-128, 128)
-                     ).astype(self.data_type)
+            self.input = (np.random.randint(0, 256, self.input_size) - 128
-            output = (input * (1 / self.scale)).astype('float')
+                          ).astype(self.data_type)
-        elif self.data_type == 'uint16':
-            output = np.random.random(self.input_size).astype(np.float32)
-            input = convert_float_to_uint16(output)
        else:
-            input = (np.random.randint(0, 100,
+            # input data values are integers from interval [0, 256)
-                                       self.input_size)).astype(self.data_type)
+            self.input = (np.random.randint(
-            output = (input * (1 / self.scale)).astype('float')
+                0, 256, self.input_size)).astype(self.data_type)
-        self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(input)}
+        self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(self.input)}
+        self.attrs = {'Scale': self.scale, 'Shift': self.shift}
+    def prepare_output_int8(self):
+        output = (self.input / self.scale -
+                  (self.shift / self.scale)).astype('float')
        self.outputs = {'Output': output}
-        self.attrs = {'Scale': self.scale, }
    def test_check_output(self):
        # TODO(wangzhongpu): support mkldnn op in dygraph mode
        self.check_output(check_dygraph=False)
+    def check_raise_error(self, msg):
+        try:
+            self.check_output()
+        except Exception as e:
+            if msg in str(e):
+                raise AttributeError
+            else:
+                print(e)
    def set_scale(self):
        pass
+    def set_shift(self):
+        pass
    def set_data_type(OpTest):
        pass
+    def set_input_size(self):
+        pass
 class TestDeQuantizeOp1(TestDeQuantizeOp):
    def set_scale(self):
@@ -81,5 +110,95 @@ class TestDeQuantizeOpBf16(TestDeQuantizeOp):
        self.data_type = 'uint16'
+class TestDeQuantizeOp_ZeroScale(TestDeQuantizeOp):
+    def set_scale(self):
+        self.scale = 0.0
+    def prepare_output_int8(self):
+        self.output = np.zeros(self.input_size)
+        self.outputs = {'Output': self.output}
+    def test_check_output(self):
+        self.assertRaises(AttributeError, self.check_raise_error,
+                          'Dequantization scale cannot be 0.0')
+# 2-dim input
+# P - positive input, with shift
+class TestDeQuantizeOpShift_2_P(TestDeQuantizeOp):
+    def set_data_type(self):
+        self.data_type = 'uint8'
+    def set_scale(self):
+        self.scale = 255.0
+    def set_shift(self):
+        self.shift = 128.0
+    def set_input_size(self):
+        self.input_size = [2, 3]
+# 2-dim input
+# N - negative input, with shift
+class TestDeQuantizeOpShift_2_N(TestDeQuantizeOpShift_2_P):
+    def set_data_type(self):
+        self.data_type = 'int8'
+    def set_scale(self):
+        self.scale = 127.0
+    def set_shift(self):
+        self.shift = 10.0
+    def set_input_size(self):
+        self.input_size = [2, 3]
+# 3-dim input
+class TestDeQuantizeOpShift_3_P(TestDeQuantizeOpShift_2_P):
+    def set_input_size(self):
+        self.input_size = [2, 3, 4]
+class TestDeQuantizeOpShift_3_N(TestDeQuantizeOpShift_2_N):
+    def set_input_size(self):
+        self.input_size = [2, 3, 4]
+# 4-dim input
+class TestDeQuantizeOpShift_4_P(TestDeQuantizeOpShift_2_P):
+    def set_input_size(self):
+        self.input_size = [2, 3, 4, 5]
+class TestDeQuantizeOpShift_4_N(TestDeQuantizeOpShift_2_N):
+    def set_input_size(self):
+        self.input_size = [2, 3, 4, 5]
+class TestDeQuantizeOp_NegativeShift(TestDeQuantizeOp):
+    def set_shift(self):
+        self.shift = -10.0
+    def prepare_output_int8(self):
+        self.output = np.zeros(self.input_size)
+        self.outputs = {'Output': self.output}
+    def test_check_output(self):
+        self.assertRaises(AttributeError, self.check_raise_error,
+                          'Dequantization shift must be nonnegative.')
+class TestDeQuantizeOp_TooBigShift(TestDeQuantizeOp_NegativeShift):
+    def set_shift(self):
+        self.shift = 300.0
+    def test_check_output(self):
+        self.assertRaises(
+            AttributeError, self.check_raise_error,
+            'Dequantization shift must be less than or equal to 255.')
 if __name__ == '__main__':
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_quantize_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_quantize_mkldnn_op.py
@@ -22,44 +22,75 @@ from paddle.fluid.tests.unittests.op_test import OpTest
 class TestQuantizeOp(OpTest):
    def setUp(self):
        self.op_type = 'quantize'
-        self.scale = 2.0
+        self.scale = 255.0
-        self.input_size = [1, 1, 5, 5]  #Naive nChw16c
+        self.shift = 0.0
+        self.input_size = [1, 1, 5, 5]  # Naive nChw16c
        self.is_negative = False
+        self.output_format = 'NCHW'
        self.set_scale()
+        self.set_shift()
        self.set_is_negative()
+        self.set_input_size()
+        self.set_output_format()
+        self.prepare_input()
+        self.prepare_output()
+    def prepare_input(self):
        if self.is_negative:
-            input = (100 * np.random.random_sample(self.input_size) - 50
+            # input data values are from interval [-1.0, 1.0)
-                     ).astype('float32')
+            self.input = (2 * np.random.random_sample(self.input_size) - 1
-            output = np.round(input * self.scale).astype('int8')
+                          ).astype('float32')
        else:
-            input = (100 *
+            # input data values are from interval [0.0, 1.0)
-                     np.random.random_sample(self.input_size)).astype('float32')
+            self.input = (
-            output = np.round(input * self.scale).astype('uint8')
+                np.random.random_sample(self.input_size)).astype('float32')
-        self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(input)}
-        self.outputs = {'Output': output}
+        self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(self.input)}
        self.attrs = {
            'Scale': self.scale,
-            'is_negative_input': self.is_negative
+            'Shift': self.shift,
+            'is_negative_input': self.is_negative,
+            'output_format': self.output_format
        }
+    def prepare_output(self):
+        input_data_type = 'int8' if self.is_negative else 'uint8'
+        output = np.rint(self.input * self.scale + self.shift).astype(
+            input_data_type)
+        self.outputs = {'Output': output}
    def test_check_output(self):
        # TODO(wangzhongpu): support mkldnn op in dygraph mode
        self.check_output(check_dygraph=False)
+    def check_raise_error(self, msg):
+        try:
+            self.check_output()
+        except Exception as e:
+            if msg in str(e):
+                raise AttributeError
+            else:
+                print(e)
    def set_scale(self):
        pass
+    def set_shift(self):
+        pass
    def set_is_negative(self):
        pass
+    def set_input_size(self):
+        pass
+    def set_output_format(self):
+        pass
 class TestQuantizeOp1(TestQuantizeOp):
    def set_scale(self):
-        self.scale = 1.5
+        self.scale = 127.0
    def set_is_negative(self):
        self.is_nagative = True
@@ -67,11 +98,137 @@ class TestQuantizeOp1(TestQuantizeOp):
 class TestQuantizeOp2(TestQuantizeOp):
    def set_scale(self):
-        self.scale = 0.1
+        self.scale = 255.0
+    def set_is_negative(self):
+        self.is_nagative = False
+class TestQuantizeOp_ZeroScale(TestQuantizeOp):
+    def set_scale(self):
+        self.scale = 0.0
+    def prepare_output(self):
+        self.output = np.zeros(self.input_size)
+        self.outputs = {'Output': self.output}
+    def test_check_output(self):
+        self.assertRaises(AttributeError, self.check_raise_error,
+                          'Quantization scale cannot be 0.0')
+# 2-dim input
+# P - positive input
+class TestQuantizeOpShift_NCHW_2_P(TestQuantizeOp):
+    def set_output_format(self):
+        self.output_format = 'NCHW'
+    def set_is_negative(self):
+        self.is_nagative = False
+    def set_scale(self):
+        self.scale = 255.0
+    def set_shift(self):
+        self.shift = 0.0
+    def set_input_size(self):
+        self.input_size = [2, 3]
+# 2-dim input
+# N - negative input
+class TestQuantizeOpShift_NCHW_2_N(TestQuantizeOpShift_NCHW_2_P):
+    def set_is_negative(self):
+        self.is_nagative = True
+    def set_scale(self):
+        self.scale = 127.0
+    def set_shift(self):
+        self.shift = 128.0
+class TestQuantizeOpShift_NHWC_2_P(TestQuantizeOpShift_NCHW_2_P):
+    def set_output_format(self):
+        self.output_format = 'NHWC'
+class TestQuantizeOpShift_NHWC_2_N(TestQuantizeOpShift_NCHW_2_N):
+    def set_output_format(self):
+        self.output_format = 'NHWC'
+# 3-dim input
+class TestQuantizeOpShift_NCHW_3_P(TestQuantizeOpShift_NCHW_2_P):
+    def set_input_size(self):
+        self.input_size = [2, 3, 4]
+class TestQuantizeOpShift_NCHW_3_N(TestQuantizeOpShift_NCHW_2_N):
+    def set_input_size(self):
+        self.input_size = [2, 3, 4]
+class TestQuantizeOpShift_NHWC_3_P(TestQuantizeOpShift_NCHW_3_P):
+    def set_output_format(self):
+        self.output_format = 'NHWC'
+class TestQuantizeOpShift_NHWC_3_N(TestQuantizeOpShift_NCHW_3_N):
+    def set_output_format(self):
+        self.output_format = 'NHWC'
+# 4-dim input
+class TestQuantizeOpShift_NCHW_4_P(TestQuantizeOpShift_NCHW_2_P):
+    def set_input_size(self):
+        self.input_size = [2, 3, 4, 5]
+class TestQuantizeOpShift_NCHW_4_N(TestQuantizeOpShift_NCHW_2_N):
+    def set_input_size(self):
+        self.input_size = [2, 3, 4, 5]
+class TestQuantizeOpShift_NHWC_4_P(TestQuantizeOpShift_NCHW_4_P):
+    def set_output_format(self):
+        self.output_format = 'NHWC'
+class TestQuantizeOpShift_NHWC_4_N(TestQuantizeOpShift_NCHW_4_N):
+    def set_output_format(self):
+        self.output_format = 'NHWC'
+class TestQuantizeOp_NegativeShift(TestQuantizeOp):
    def set_is_negative(self):
        self.is_nagative = False
+    def set_scale(self):
+        self.scale = 100.0
+    def set_shift(self):
+        self.shift = -10.0
+    def prepare_output(self):
+        self.output = np.zeros(self.input_size)
+        self.outputs = {'Output': self.output}
+    def test_check_output(self):
+        self.assertRaises(AttributeError, self.check_raise_error,
+                          'Quantization shift must be nonnegative.')
+class TestQuantizeOp_TooBigShift(TestQuantizeOp_NegativeShift):
+    def set_shift(self):
+        self.shift = 300.0
+    def test_check_output(self):
+        self.assertRaises(
+            AttributeError, self.check_raise_error,
+            'Quantization shift must be less than or equal to 255.')
 if __name__ == '__main__':
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_requantize_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_requantize_mkldnn_op.py
@@ -25,88 +25,271 @@ from mkldnn_op_test import format_reorder
 class TestReQuantizeOp(OpTest):
    def setUp(self):
        self.op_type = 'requantize'
-        self.scale_in = 2.0
+        self.scale_in = 127.0
-        self.scale_out = 1.5
+        self.shift_in = 0.0
+        self.scale_out = 100.0
+        self.shift_out = 0.0
        self.input_size = [1, 1, 10, 10]
-        self.data_type = 'int8'
+        self.input_data_type = 'int8'
-        self.set_scale()
+        self.set_scales()
-        self.set_data_type()
+        self.set_shifts()
-        self.prepare_inputs()
+        self.set_input_data_type()
+        self.prepare_input()
-    def prepare_inputs(self):
+        self.prepare_output()
-        scale_shift = self.scale_out / self.scale_in
+    def prepare_input(self):
-        if self.data_type == 'int8':
+        if self.input_data_type == 'int8':
-            self.input = (np.random.randint(0, 100, self.input_size) - 50
+            # input data values are integers from interval [-128, 128)
-                          ).astype(self.data_type)
+            self.input = (np.random.randint(0, 256, self.input_size) - 128
-            output_tmp = np.round(self.input.astype('float32') *
+                          ).astype(self.input_data_type)
-                                  scale_shift).astype('int8')
        else:
+            # input data values are integers from interval [0, 256)
            self.input = (np.random.randint(
-                0, 100, self.input_size)).astype(self.data_type)
+                0, 256, self.input_size)).astype(self.input_data_type)
-            output_tmp = np.round(self.input.astype('float32') *
-                                  scale_shift).astype('uint8')
-        self.output = format_reorder(output_tmp, self.input_size)
        self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(self.input)}
+        self.attrs = {
+            'Scale_in': self.scale_in,
+            'Scale_out': self.scale_out,
+            'Shift_in': self.shift_in,
+            'Shift_out': self.shift_out
+        }
-        self.outputs = {'Output': self.output}
+    def prepare_output(self):
+        scale_ratio = self.scale_out / self.scale_in
+        with_shift = (self.shift_in != 0.0 or self.shift_out != 0.0)
+        if with_shift or self.input_data_type == 'uint8':
+            dst_type = 'uint8'
+            type_min = 0
+            type_max = 255
+            new_shift = np.clip(
+                np.rint(self.shift_out - scale_ratio * self.shift_in), type_min,
+                type_max)
+        else:
+            dst_type = 'int8'
+            type_min = -128
+            type_max = 127
+            new_shift = 0
-        self.attrs = {'Scale_in': self.scale_in, 'Scale_out': self.scale_out}
+        output_tmp = np.clip(
+            np.rint(self.input.astype('float32') * scale_ratio + new_shift),
+            type_min, type_max).astype(dst_type)
+        self.output = format_reorder(output_tmp, self.input_size)
+        self.outputs = {'Output': self.output}
    def test_check_output(self):
        # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.assertTrue(self.input_data_type == 'uint8' or self.shift_in == 0.0,
+                        'Input data must be unsigned if it has nonzero shift.')
        self.check_output(check_dygraph=False)
-    def set_scale(self):
+    def check_raise_error(self, msg):
+        try:
+            self.check_output()
+        except Exception as e:
+            if msg in str(e):
+                raise AttributeError
+            else:
+                print(e)
+    def set_scales(self):
        pass
-    def set_data_type(OpTest):
+    def set_shifts(self):
        pass
+    def set_input_data_type(OpTest):
+        pass
+# ---------------test requantize with s8 input, no shift--------------------
-#--------------------test requantize with s8 input--------------------
+class TestReQuantizeOp_S8_SameScales(TestReQuantizeOp):
+    def set_scales(self):
+        self.scale_in = 127.0
+        self.scale_out = 127.0
-class TestReQuantizeOp1(TestReQuantizeOp):
+class TestReQuantizeOp_S8_DifferentScales_1(TestReQuantizeOp):
-    def set_scale(self):
+    def set_scales(self):
-        self.scale_in = 1.5
+        self.scale_in = 127.0
-        self.scale_out = 1.5
+        self.scale_out = 100.0
-class TestReQuantizeOp2(TestReQuantizeOp):
+class TestReQuantizeOp_S8_DifferentScales_2(TestReQuantizeOp):
-    def set_scale(self):
+    def set_scales(self):
-        self.scale_in = 0.1
+        self.scale_in = 100.0
-        self.scale_out = 0.2
+        self.scale_out = 127.0
-#--------------------test requantize with u8 input--------------------
+class TestReQuantizeOp_S8_ZeroInputScale(TestReQuantizeOp):
+    def set_scales(self):
+        self.scale_in = 0.0
+        self.scale_out = 127.0
+    def prepare_output(self):
+        self.output = np.zeros(self.input_size)
+        self.outputs = {'Output': self.output}
+    def test_check_output(self):
+        self.assertRaises(AttributeError, self.check_raise_error,
+                          'Scale of input cannot be 0.0')
-class TestReQuantizeOp3(TestReQuantizeOp1):
-    def set_data_type(self):
-        self.data_type = 'uint8'
+class TestReQuantizeOp_S8_ZeroOutputScale(TestReQuantizeOp):
+    def set_scales(self):
+        self.scale_in = 127.0
+        self.scale_out = 0.0
-class TestReQuantizeOp4(TestReQuantizeOp2):
+    def prepare_output(self):
-    def set_data_type(self):
+        self.output = np.zeros(self.input_size)
-        self.data_type = 'uint8'
+        self.outputs = {'Output': self.output}
+    def test_check_output(self):
+        self.assertRaises(AttributeError, self.check_raise_error,
+                          'Scale of output cannot be 0.0')
+# ---------------test requantize with u8 input, no shift--------------------
+class TestReQuantizeOp_U8_SameScales(TestReQuantizeOp_S8_SameScales):
+    def set_input_data_type(self):
+        self.input_data_type = 'uint8'
+class TestReQuantizeOp_U8_DifferentScales_1(
+        TestReQuantizeOp_S8_DifferentScales_1):
+    def set_input_data_type(self):
+        self.input_data_type = 'uint8'
+class TestReQuantizeOp_U8_DifferentScales_2(
+        TestReQuantizeOp_S8_DifferentScales_2):
+    def set_input_data_type(self):
+        self.input_data_type = 'uint8'
+# ---------------test requantize with s8 input, with shift------------------
+class TestReQuantizeOp_S8_WithShift(TestReQuantizeOp):
+    def set_scales(self):
+        self.scale_in = 60.0
+        self.scale_out = 127.0
+    def set_shifts(self):
+        self.shift_in = 128.0
+        self.shift_out = 128.0
+    def test_check_output(self):
+        self.assertRaises(
+            AttributeError, self.check_raise_error,
+            'Requantize does not support nonzero shift for signed input.')
-#-------------------test reused requantize op---------------------------
+class TestReQuantizeOp_S8_WithOutputShift(TestReQuantizeOp):
+    def set_scales(self):
+        self.scale_in = 127.0
+        self.scale_out = 60.0
+    def set_shifts(self):
+        self.shift_in = 0.0
+        self.shift_out = 120.0
+# ---------------test requantize with u8 input, with shift------------------
+class TestReQuantizeOp_U8_SameScales_SameShift(TestReQuantizeOp_U8_SameScales):
+    def set_shifts(self):
+        self.shift_in = 128.0
+        self.shift_out = 128.0
+class TestReQuantizeOp_U8_SameScales_DifferentShift_1(
+        TestReQuantizeOp_U8_SameScales):
+    def set_shifts(self):
+        self.shift_in = 60.0
+        self.shift_out = 128.0
+class TestReQuantizeOp_U8_SameScales_DifferentShift_2(
+        TestReQuantizeOp_U8_SameScales):
+    def set_shifts(self):
+        self.shift_in = 128.0
+        self.shift_out = 60.0
+class TestReQuantizeOp_U8_DifferentScales_1_SameShift(
+        TestReQuantizeOp_U8_DifferentScales_1):
+    def set_shifts(self):
+        self.shift_in = 128.0
+        self.shift_out = 128.0
+class TestReQuantizeOp_U8_DifferentScales_2_SameShift(
+        TestReQuantizeOp_U8_DifferentScales_2):
+    def set_shifts(self):
+        self.shift_in = 128.0
+        self.shift_out = 128.0
+class TestReQuantizeOp_U8_DifferentScales_1_DifferentShift_1(
+        TestReQuantizeOp_U8_DifferentScales_1):
+    def set_shifts(self):
+        self.shift_in = 128.0
+        self.shift_out = 60.0
+class TestReQuantizeOp_U8_DifferentScales_2_DifferentShift_1(
+        TestReQuantizeOp_U8_DifferentScales_2):
+    def set_shifts(self):
+        self.shift_in = 128.0
+        self.shift_out = 60.0
+class TestReQuantizeOp_U8_DifferentScales_1_DifferentShift_2(
+        TestReQuantizeOp_U8_DifferentScales_1):
+    def set_shifts(self):
+        self.shift_in = 60.0
+        self.shift_out = 128.0
+class TestReQuantizeOp_U8_DifferentScales_2_DifferentShift_2(
+        TestReQuantizeOp_U8_DifferentScales_2):
+    def set_shifts(self):
+        self.shift_in = 60.0
+        self.shift_out = 128.0
+# ---------------test reused requantize op, no shift------------------------
 class TestReQuantizeOpReused(TestReQuantizeOp):
    def setUp(self):
-        self.input_size = [1, 1, 10, 10]
+        #  self.input_size = [1, 1, 10, 10]
-        self.data_type = 'int8'
+        self.input_size = [1, 1, 2, 2]
-        self.set_scale()
+        self.input_data_type = 'int8'
-        self.prepare_inputs()
+        self.set_scales()
+        self.set_shifts()
-    def set_scale(self):
+        self.set_input_data_type()
-        self.scale_in = 0.1
+        self.prepare_input()
-        self.scale_out = 0.2
+        self.prepare_output()
+    def set_scales(self):
+        self.scale_in = 100.0
+        self.scale_out = 120.0
+    def set_shifts(self):
+        self.shift_in = 0.0
+        self.shift_out = 0.0
+    def set_input_data_type(self):
+        pass
    def test_check_output(self):
        variables = {
@@ -119,12 +302,16 @@ class TestReQuantizeOpReused(TestReQuantizeOp):
            for name in variables:
                block.create_var(
                    name=name, dtype="int8", shape=variables[name].shape)
-            requant_op = block.append_op(
+            block.append_op(
                type="requantize",
                inputs={'Input': block.var('input'), },
                outputs={"Output": block.var('output')},
-                attrs={'Scale_in': self.scale_in,
+                attrs={
-                       'Scale_out': self.scale_out})
+                    'Scale_in': self.scale_in,
+                    'Scale_out': self.scale_out,
+                    'Shift_in': self.shift_in,
+                    'Shift_out': self.shift_out
+                })
            place = core.CPUPlace()
            exe = fluid.Executor(place)
            for i in range(2):
@@ -137,5 +324,17 @@ class TestReQuantizeOpReused(TestReQuantizeOp):
                    variables['output'], out[0], atol=1e-4), 'output')
+# ---------------test reused requantize op, no shift------------------------
+class TestReQuantizeOpReused_WithShift(TestReQuantizeOpReused):
+    def set_input_data_type(self):
+        self.input_data_type = 'uint8'
+    def set_shifts(self):
+        self.shift_in = 128
+        self.shift_out = 60
 if __name__ == '__main__':
    unittest.main()