diff --git a/paddle/fluid/operators/dequantize_op.cc b/paddle/fluid/operators/dequantize_op.cc
index 0ed3293418fb124873b422036cb6c946823a83bf..8c2aeb1f8e64a13ae4369fab7c37fe055e303b2e 100644
--- a/paddle/fluid/operators/dequantize_op.cc
+++ b/paddle/fluid/operators/dequantize_op.cc
@@ -31,9 +31,10 @@ framework::OpKernelType DeQuantOp::GetExpectedKernelType(
 }
 
 void DeQuantOpMaker::Make() {
-  AddInput("Input", "input data");
-  AddOutput("Output", "output data");
-  AddAttr<float>("Scale", "scale data").SetDefault({1.0f});
+  AddInput("Input", "Input data");
+  AddOutput("Output", "Output data");
+  AddAttr<float>("Scale", "Scale data").SetDefault({1.0f});
+  AddAttr<float>("Shift", "Shift data").SetDefault({0.0f});
   AddComment(R"DOC(This op will dequantize data from INT8 to FP32)DOC");
 }
 
diff --git a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
index 70d4c34d9c5c4d28e2705c85f56bc65f90fbb3cf..e036fd9aba04b214c22f2f179de5ba5eb5dd277d 100644
--- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
@@ -16,6 +16,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/data_layout_transform.h"
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/operators/dequantize_op.h"
+#include "paddle/fluid/platform/errors.h"
 #include "paddle/fluid/platform/mkldnn_helper.h"
 #include "paddle/fluid/platform/mkldnn_reuse.h"
 
@@ -37,14 +38,29 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto* input = ctx.Input<Tensor>("Input");
     auto scale_data = ctx.Attr<float>("Scale");
+    auto scale_shift = ctx.Attr<float>("Shift");
+    bool with_shift = scale_shift != 0.0f;
     auto* output = ctx.Output<Tensor>("Output");
+
+    PADDLE_ENFORCE_NE(scale_data, 0.0f,
+                      platform::errors::InvalidArgument(
+                          "Dequantization scale cannot be 0.0"));
+    PADDLE_ENFORCE_GE(scale_shift, 0,
+                      platform::errors::Unimplemented(
+                          "Dequantization shift must be nonnegative."));
+    PADDLE_ENFORCE_LE(
+        scale_shift, 255,
+        platform::errors::Unimplemented(
+            "Dequantization shift must be less than or equal to 255."));
+
     auto& dev_ctx =
         ctx.template device_context<platform::MKLDNNDeviceContext>();
     const auto& engine = dev_ctx.GetEngine();
 
     const T* input_data = input->data<T>();
     float* output_data = output->mutable_data<float>(ctx.GetPlace());
-    std::vector<float> reorder_scale = {1.0f / scale_data};
+
+    float reorder_shift = -scale_shift / scale_data;
 
     auto src_tz = paddle::framework::vectorize<int64_t>(input->dims());
     auto dst_tz = paddle::framework::vectorize<int64_t>(output->dims());
@@ -65,7 +81,15 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
     if (reorder_p == nullptr) {
       mkldnn::primitive_attr attri;
       int mask = 0;
-      attri.set_output_scales(mask, reorder_scale);
+      float reorder_scale = 1. / scale_data;
+      attri.set_output_scales(mask, {reorder_scale});
+
+      if (with_shift) {
+        mkldnn::post_ops post_operations;
+        post_operations.append_sum();
+        attri.set_post_ops(post_operations);
+        std::fill(output_data, output_data + output->numel(), reorder_shift);
+      }
 
       auto src_md = platform::MKLDNNMemDesc({src_tz}, src_dt, src_fmt);
       src_memory = std::make_shared<mkldnn::memory>(
@@ -92,6 +116,8 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
 
       dst_memory = std::static_pointer_cast<mkldnn::memory>(
           dev_ctx.GetBlob(key_dst_mem));
+      if (with_shift)
+        std::fill(output_data, output_data + output->numel(), reorder_shift);
       dst_memory->set_data_handle(output->mutable_data<float>(ctx.GetPlace()));
     }
 
diff --git a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc
index a6c8f8656a4e252f1a1eedb6d67ca322f0747a66..e5dedd403f39f18ae7002296d589947b887d1430 100644
--- a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc
@@ -36,7 +36,21 @@ class QuantOpKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto* input = ctx.Input<Tensor>("Input");
     auto scale_data = ctx.Attr<float>("Scale");
+    auto scale_shift = ctx.Attr<float>("Shift");
+    bool with_shift = scale_shift != 0.0f;
     auto* output = ctx.Output<Tensor>("Output");
+
+    PADDLE_ENFORCE_NE(
+        scale_data, 0.0f,
+        platform::errors::InvalidArgument("Quantization scale cannot be 0.0"));
+    PADDLE_ENFORCE_GE(scale_shift, 0,
+                      platform::errors::Unimplemented(
+                          "Quantization shift must be nonnegative."));
+    PADDLE_ENFORCE_LE(
+        scale_shift, 255,
+        platform::errors::Unimplemented(
+            "Quantization shift must be less than or equal to 255."));
+
     auto& dev_ctx =
         ctx.template device_context<platform::MKLDNNDeviceContext>();
     const auto& engine = dev_ctx.GetEngine();
@@ -47,11 +61,12 @@ class QuantOpKernel : public framework::OpKernel<T> {
 
     const T* input_data = input->data<T>();
 
-    bool is_negative = ctx.Attr<bool>("is_negative_input");
+    bool is_negative_input = ctx.Attr<bool>("is_negative_input");
     bool bfloat16 = ctx.Attr<bool>("bfloat16");
-    std::string key =
-        platform::CreateKey(platform::ThreadIDasStr(), src_tz, scale_data,
-                            is_negative, ctx.OutputName("Output"));
+
+    std::string key = platform::CreateKey(
+        platform::ThreadIDasStr(), src_tz, scale_data, scale_shift,
+        is_negative_input, ctx.OutputName("Output"));
     const std::string key_prim = key + "@r";
     const std::string key_src_mem = key + "@s";
     const std::string key_dst_mem = key + "@d";
@@ -69,6 +84,15 @@ class QuantOpKernel : public framework::OpKernel<T> {
       int mask = 0;
       attri.set_output_scales(mask, {scale_data});
 
+      if (with_shift) {
+        mkldnn::post_ops post_operations;
+        post_operations.append_sum();
+        attri.set_post_ops(post_operations);
+        uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace());
+        // memset casts scale_shift to unsigned char (uint8_t) internally
+        std::memset(output_data, scale_shift, output->numel());
+      }
+
       auto src_md = platform::MKLDNNMemDesc({src_tz}, memory::data_type::f32,
                                             input->format());
       src_memory = std::make_shared<mkldnn::memory>(
@@ -78,7 +102,7 @@ class QuantOpKernel : public framework::OpKernel<T> {
       if (bfloat16) {
         platform::SetDstMemoryQuantized<paddle::platform::bfloat16>(
             ctx, output, dst_tz, engine, dst_md, dst_memory, out_format);
-      } else if (is_negative) {
+      } else if (is_negative_input && !with_shift) {
         platform::SetDstMemoryQuantized<int8_t>(ctx, output, dst_tz, engine,
                                                 dst_md, dst_memory, out_format);
       } else {
@@ -104,10 +128,13 @@ class QuantOpKernel : public framework::OpKernel<T> {
       if (bfloat16) {
         dst_memory->set_data_handle(
             output->mutable_data<paddle::platform::bfloat16>(place));
-      } else if (is_negative) {
-        dst_memory->set_data_handle(output->mutable_data<int8_t>(place));
+      } else if (with_shift || !is_negative_input) {
+        uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace());
+        if (with_shift) std::memset(output_data, scale_shift, output->numel());
+        dst_memory->set_data_handle(output_data);
       } else {
-        dst_memory->set_data_handle(output->mutable_data<uint8_t>(place));
+        dst_memory->set_data_handle(
+            output->mutable_data<int8_t>(ctx.GetPlace()));
       }
     }
 
diff --git a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
index 5ad5ad9450503111882a9b3bc2cd9161f74d500e..4666e5b74a5cc252b078a434c58f9ac6f3f2891a 100644
--- a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
@@ -26,20 +26,45 @@ using dnnl::reorder;
 using platform::to_void_cast;
 using Tensor = framework::Tensor;
 
+namespace {
+
+inline uint8_t clip_to_uint8(float x) {
+  return std::max(0L, std::min(255L, std::lround(x)));
+}
+
+}  // namespace
+
 template <typename T>
 class ReQuantOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto* input = ctx.Input<Tensor>("Input");
     auto scale_in = ctx.Attr<float>("Scale_in");
+    auto shift_in = ctx.Attr<float>("Shift_in");
     auto scale_out = ctx.Attr<float>("Scale_out");
+    auto shift_out = ctx.Attr<float>("Shift_out");
+    bool with_shift = shift_in != 0.0f || shift_out != 0.0f;
     auto* output = ctx.Output<Tensor>("Output");
+
+    PADDLE_ENFORCE_NE(scale_in, 0.0f, platform::errors::InvalidArgument(
+                                          "Scale of input cannot be 0.0"));
+    PADDLE_ENFORCE_NE(scale_out, 0.0f, platform::errors::InvalidArgument(
+                                           "Scale of output cannot be 0.0"));
+    if (shift_in != 0.0f) {
+      PADDLE_ENFORCE_EQ(
+          input->type(), framework::proto::VarType::UINT8,
+          platform::errors::Unimplemented("Requantize does not support nonzero "
+                                          "shift for signed input."));
+    }
+
     auto& dev_ctx =
         ctx.template device_context<platform::MKLDNNDeviceContext>();
     const auto& engine = dev_ctx.GetEngine();
 
     auto src_tz = paddle::framework::vectorize(input->dims());
 
+    float reorder_scale = scale_out / scale_in;
+
     std::string key =
         platform::CreateKey(platform::ThreadIDasStr(), src_tz, scale_in,
                             scale_out, ctx.OutputName("Output"));
@@ -53,28 +78,37 @@ class ReQuantOpKernel : public framework::OpKernel<T> {
     reorder_p = std::static_pointer_cast<reorder>(dev_ctx.GetBlob(key_prim));
 
     const T* input_data = input->data<T>();
-    T* output_data = output->mutable_data<T>(ctx.GetPlace());
 
     if (reorder_p == nullptr) {
-      dnnl::primitive_attr attri;
-      int mask = 0;
-      float scale_shift = scale_out / scale_in;
-      attri.set_output_scales(mask, {scale_shift});
-
-      auto dst_tz = paddle::framework::vectorize(output->dims());
-      dnnl::memory::data_type src_dt =
-          paddle::framework::ToMKLDNNDataType(input->type());
-      dnnl::memory::data_type dst_dt = src_dt;
+      auto dst_tz = framework::vectorize(output->dims());
+      auto src_dt = framework::ToMKLDNNDataType(input->type());
+      auto dst_dt = with_shift ? framework::MKLDNNDataType::u8 : src_dt;
 
       auto src_md =
           platform::MKLDNNMemDesc({src_tz}, src_dt, MKLDNNMemoryFormat::nhwc);
       src_memory = std::make_shared<dnnl::memory>(src_md, engine,
                                                   to_void_cast<T>(input_data));
-
       auto dst_md =
           platform::MKLDNNMemDesc({dst_tz}, dst_dt, MKLDNNMemoryFormat::nhwc);
-      dst_memory = std::make_shared<dnnl::memory>(dst_md, engine,
-                                                  to_void_cast<T>(output_data));
+
+      dnnl::primitive_attr attri;
+      int mask = 0;
+      attri.set_output_scales(mask, {reorder_scale});
+      if (with_shift) {
+        mkldnn::post_ops post_operations;
+        post_operations.append_sum();
+        attri.set_post_ops(post_operations);
+        uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace());
+        uint8_t reorder_shift =
+            clip_to_uint8(shift_out - reorder_scale * shift_in);
+        std::memset(output_data, reorder_shift, output->numel());
+        dst_memory = std::make_shared<dnnl::memory>(
+            dst_md, engine, to_void_cast<uint8_t>(output_data));
+      } else {
+        T* output_data = output->mutable_data<T>(ctx.GetPlace());
+        dst_memory = std::make_shared<dnnl::memory>(
+            dst_md, engine, to_void_cast<T>(output_data));
+      }
 
       auto reorder_pd =
           reorder::primitive_desc(*src_memory, *dst_memory, attri);
@@ -90,7 +124,17 @@ class ReQuantOpKernel : public framework::OpKernel<T> {
 
       dst_memory =
           std::static_pointer_cast<dnnl::memory>(dev_ctx.GetBlob(key_dst_mem));
-      dst_memory->set_data_handle(output_data);
+      if (with_shift) {
+        uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace());
+        uint8_t reorder_shift =
+            clip_to_uint8(shift_out - reorder_scale * shift_in);
+        std::memset(output_data, reorder_shift, output->numel());
+        dst_memory->set_data_handle(output_data);
+
+      } else {
+        T* output_data = output->mutable_data<T>(ctx.GetPlace());
+        dst_memory->set_data_handle(output_data);
+      }
     }
 
     dnnl::stream astream(engine);
diff --git a/paddle/fluid/operators/quantize_op.cc b/paddle/fluid/operators/quantize_op.cc
index 602fdc6ff67787ace488379a2730dad4b8ffe1b1..ee5829319d2a621c50fc52fa9be7f8993dccadb6 100644
--- a/paddle/fluid/operators/quantize_op.cc
+++ b/paddle/fluid/operators/quantize_op.cc
@@ -31,12 +31,16 @@ framework::OpKernelType QuantOp::GetExpectedKernelType(
 }
 
 void QuantOpMaker::Make() {
-  AddInput("Input", "input data");
-  AddOutput("Output", "output data");
+  AddInput("Input", "Input data");
+  AddOutput("Output", "Output data");
   AddAttr<bool>("is_negative_input",
                 "(bool, default false) Only used in mkldnn INT8 kernel")
       .SetDefault(false);
-  AddAttr<float>("Scale", "scale data").SetDefault({1.0f});
+  AddAttr<float>("Scale", "Scale data").SetDefault({1.0f});
+  AddAttr<float>(
+      "Shift",
+      "Shift data. When Shift is non-zero, data is quantized to unsigned int8.")
+      .SetDefault({0.0f});
   AddAttr<std::string>("output_format",
                        "Convert format to NHWC or NCHW during quantization.")
       .SetDefault("NHWC");
diff --git a/paddle/fluid/operators/requantize_op.cc b/paddle/fluid/operators/requantize_op.cc
index c17b6ef8842ad1682619ce6bf6c1a4a17fcc67b4..ea3058c5ae4a1852b79abebeef9c088719330023 100644
--- a/paddle/fluid/operators/requantize_op.cc
+++ b/paddle/fluid/operators/requantize_op.cc
@@ -31,10 +31,12 @@ framework::OpKernelType ReQuantOp::GetExpectedKernelType(
 }
 
 void ReQuantOpMaker::Make() {
-  AddInput("Input", "input data");
-  AddOutput("Output", "output data");
-  AddAttr<float>("Scale_in", "scale in data").SetDefault({1.0f});
-  AddAttr<float>("Scale_out", "scale out data").SetDefault({1.0f});
+  AddInput("Input", "Input data");
+  AddOutput("Output", "Output data");
+  AddAttr<float>("Scale_in", "Scale in data").SetDefault({1.0f});
+  AddAttr<float>("Scale_out", "Scale out data").SetDefault({1.0f});
+  AddAttr<float>("Shift_in", "Shift in data").SetDefault({1.0f});
+  AddAttr<float>("Shift_out", "Shift out data").SetDefault({1.0f});
   AddComment(
       R"DOC(This op will re-quantize data from INT8 with scale_in to INT8 with scale_out)DOC");
 }
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_dequantize_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_dequantize_mkldnn_op.py
index 70c76f1fb7186fcc983c0378af657d4aae2d2b32..285b6d21fcf9f6a9f7e258e65132c95c3e5c1fc2 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_dequantize_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_dequantize_mkldnn_op.py
@@ -22,40 +22,69 @@ from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16
 class TestDeQuantizeOp(OpTest):
     def setUp(self):
         self.op_type = 'dequantize'
-        self.scale = 2.0
-        self.input_size = [1, 1, 5, 5]  #Naive nChw16c
+        self.scale = 127.0
+        self.shift = 0.0
+        self.input_size = [1, 1, 5, 5]  # Naive nChw16c
         self.data_type = 'int8'
         self.set_scale()
+        self.set_shift()
         self.set_data_type()
+        self.set_input_size()
+        if self.data_type == 'uint16':
+            self.prepare_input_output_bf16()
+        else:
+            self.prepare_input_int8()
+            self.prepare_output_int8()
+
+    def prepare_input_output_bf16(self):
+        output = np.random.random(self.input_size).astype(np.float32)
+        input = convert_float_to_uint16(output)
+        self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(input)}
+        self.outputs = {'Output': output}
 
+    def prepare_input_int8(self):
         if self.data_type == 'int8':
-            input = (np.random.randint(0, 100, self.input_size) - 50
-                     ).astype(self.data_type)
-            output = (input * (1 / self.scale)).astype('float')
-        elif self.data_type == 'uint16':
-            output = np.random.random(self.input_size).astype(np.float32)
-            input = convert_float_to_uint16(output)
+            # input data values are integers from interval [-128, 128)
+            self.input = (np.random.randint(0, 256, self.input_size) - 128
+                          ).astype(self.data_type)
         else:
-            input = (np.random.randint(0, 100,
-                                       self.input_size)).astype(self.data_type)
-            output = (input * (1 / self.scale)).astype('float')
+            # input data values are integers from interval [0, 256)
+            self.input = (np.random.randint(
+                0, 256, self.input_size)).astype(self.data_type)
 
-        self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(input)}
+        self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(self.input)}
+        self.attrs = {'Scale': self.scale, 'Shift': self.shift}
 
+    def prepare_output_int8(self):
+        output = (self.input / self.scale -
+                  (self.shift / self.scale)).astype('float')
         self.outputs = {'Output': output}
 
-        self.attrs = {'Scale': self.scale, }
-
     def test_check_output(self):
         # TODO(wangzhongpu): support mkldnn op in dygraph mode
         self.check_output(check_dygraph=False)
 
+    def check_raise_error(self, msg):
+        try:
+            self.check_output()
+        except Exception as e:
+            if msg in str(e):
+                raise AttributeError
+            else:
+                print(e)
+
     def set_scale(self):
         pass
 
+    def set_shift(self):
+        pass
+
     def set_data_type(OpTest):
         pass
 
+    def set_input_size(self):
+        pass
+
 
 class TestDeQuantizeOp1(TestDeQuantizeOp):
     def set_scale(self):
@@ -81,5 +110,95 @@ class TestDeQuantizeOpBf16(TestDeQuantizeOp):
         self.data_type = 'uint16'
 
 
+class TestDeQuantizeOp_ZeroScale(TestDeQuantizeOp):
+    def set_scale(self):
+        self.scale = 0.0
+
+    def prepare_output_int8(self):
+        self.output = np.zeros(self.input_size)
+        self.outputs = {'Output': self.output}
+
+    def test_check_output(self):
+        self.assertRaises(AttributeError, self.check_raise_error,
+                          'Dequantization scale cannot be 0.0')
+
+
+# 2-dim input
+# P - positive input, with shift
+class TestDeQuantizeOpShift_2_P(TestDeQuantizeOp):
+    def set_data_type(self):
+        self.data_type = 'uint8'
+
+    def set_scale(self):
+        self.scale = 255.0
+
+    def set_shift(self):
+        self.shift = 128.0
+
+    def set_input_size(self):
+        self.input_size = [2, 3]
+
+
+# 2-dim input
+# N - negative input, with shift
+class TestDeQuantizeOpShift_2_N(TestDeQuantizeOpShift_2_P):
+    def set_data_type(self):
+        self.data_type = 'int8'
+
+    def set_scale(self):
+        self.scale = 127.0
+
+    def set_shift(self):
+        self.shift = 10.0
+
+    def set_input_size(self):
+        self.input_size = [2, 3]
+
+
+# 3-dim input
+class TestDeQuantizeOpShift_3_P(TestDeQuantizeOpShift_2_P):
+    def set_input_size(self):
+        self.input_size = [2, 3, 4]
+
+
+class TestDeQuantizeOpShift_3_N(TestDeQuantizeOpShift_2_N):
+    def set_input_size(self):
+        self.input_size = [2, 3, 4]
+
+
+# 4-dim input
+class TestDeQuantizeOpShift_4_P(TestDeQuantizeOpShift_2_P):
+    def set_input_size(self):
+        self.input_size = [2, 3, 4, 5]
+
+
+class TestDeQuantizeOpShift_4_N(TestDeQuantizeOpShift_2_N):
+    def set_input_size(self):
+        self.input_size = [2, 3, 4, 5]
+
+
+class TestDeQuantizeOp_NegativeShift(TestDeQuantizeOp):
+    def set_shift(self):
+        self.shift = -10.0
+
+    def prepare_output_int8(self):
+        self.output = np.zeros(self.input_size)
+        self.outputs = {'Output': self.output}
+
+    def test_check_output(self):
+        self.assertRaises(AttributeError, self.check_raise_error,
+                          'Dequantization shift must be nonnegative.')
+
+
+class TestDeQuantizeOp_TooBigShift(TestDeQuantizeOp_NegativeShift):
+    def set_shift(self):
+        self.shift = 300.0
+
+    def test_check_output(self):
+        self.assertRaises(
+            AttributeError, self.check_raise_error,
+            'Dequantization shift must be less than or equal to 255.')
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_quantize_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_quantize_mkldnn_op.py
index 9f08fea778aaeaae7983af0f2ad8e5304d034382..a7acc5f3f9bf327146df804949e5428c999edd12 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_quantize_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_quantize_mkldnn_op.py
@@ -22,44 +22,75 @@ from paddle.fluid.tests.unittests.op_test import OpTest
 class TestQuantizeOp(OpTest):
     def setUp(self):
         self.op_type = 'quantize'
-        self.scale = 2.0
-        self.input_size = [1, 1, 5, 5]  #Naive nChw16c
+        self.scale = 255.0
+        self.shift = 0.0
+        self.input_size = [1, 1, 5, 5]  # Naive nChw16c
         self.is_negative = False
+        self.output_format = 'NCHW'
         self.set_scale()
+        self.set_shift()
         self.set_is_negative()
+        self.set_input_size()
+        self.set_output_format()
+        self.prepare_input()
+        self.prepare_output()
 
+    def prepare_input(self):
         if self.is_negative:
-            input = (100 * np.random.random_sample(self.input_size) - 50
-                     ).astype('float32')
-            output = np.round(input * self.scale).astype('int8')
+            # input data values are from interval [-1.0, 1.0)
+            self.input = (2 * np.random.random_sample(self.input_size) - 1
+                          ).astype('float32')
         else:
-            input = (100 *
-                     np.random.random_sample(self.input_size)).astype('float32')
-            output = np.round(input * self.scale).astype('uint8')
-
-        self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(input)}
-
-        self.outputs = {'Output': output}
+            # input data values are from interval [0.0, 1.0)
+            self.input = (
+                np.random.random_sample(self.input_size)).astype('float32')
 
+        self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(self.input)}
         self.attrs = {
             'Scale': self.scale,
-            'is_negative_input': self.is_negative
+            'Shift': self.shift,
+            'is_negative_input': self.is_negative,
+            'output_format': self.output_format
         }
 
+    def prepare_output(self):
+        input_data_type = 'int8' if self.is_negative else 'uint8'
+        output = np.rint(self.input * self.scale + self.shift).astype(
+            input_data_type)
+        self.outputs = {'Output': output}
+
     def test_check_output(self):
         # TODO(wangzhongpu): support mkldnn op in dygraph mode
         self.check_output(check_dygraph=False)
 
+    def check_raise_error(self, msg):
+        try:
+            self.check_output()
+        except Exception as e:
+            if msg in str(e):
+                raise AttributeError
+            else:
+                print(e)
+
     def set_scale(self):
         pass
 
+    def set_shift(self):
+        pass
+
     def set_is_negative(self):
         pass
 
+    def set_input_size(self):
+        pass
+
+    def set_output_format(self):
+        pass
+
 
 class TestQuantizeOp1(TestQuantizeOp):
     def set_scale(self):
-        self.scale = 1.5
+        self.scale = 127.0
 
     def set_is_negative(self):
         self.is_nagative = True
@@ -67,11 +98,137 @@ class TestQuantizeOp1(TestQuantizeOp):
 
 class TestQuantizeOp2(TestQuantizeOp):
     def set_scale(self):
-        self.scale = 0.1
+        self.scale = 255.0
+
+    def set_is_negative(self):
+        self.is_nagative = False
+
+
+class TestQuantizeOp_ZeroScale(TestQuantizeOp):
+    def set_scale(self):
+        self.scale = 0.0
+
+    def prepare_output(self):
+        self.output = np.zeros(self.input_size)
+        self.outputs = {'Output': self.output}
+
+    def test_check_output(self):
+        self.assertRaises(AttributeError, self.check_raise_error,
+                          'Quantization scale cannot be 0.0')
+
+
+# 2-dim input
+# P - positive input
+class TestQuantizeOpShift_NCHW_2_P(TestQuantizeOp):
+    def set_output_format(self):
+        self.output_format = 'NCHW'
+
+    def set_is_negative(self):
+        self.is_nagative = False
+
+    def set_scale(self):
+        self.scale = 255.0
+
+    def set_shift(self):
+        self.shift = 0.0
+
+    def set_input_size(self):
+        self.input_size = [2, 3]
+
+
+# 2-dim input
+# N - negative input
+class TestQuantizeOpShift_NCHW_2_N(TestQuantizeOpShift_NCHW_2_P):
+    def set_is_negative(self):
+        self.is_nagative = True
+
+    def set_scale(self):
+        self.scale = 127.0
+
+    def set_shift(self):
+        self.shift = 128.0
+
+
+class TestQuantizeOpShift_NHWC_2_P(TestQuantizeOpShift_NCHW_2_P):
+    def set_output_format(self):
+        self.output_format = 'NHWC'
+
+
+class TestQuantizeOpShift_NHWC_2_N(TestQuantizeOpShift_NCHW_2_N):
+    def set_output_format(self):
+        self.output_format = 'NHWC'
+
+
+# 3-dim input
+class TestQuantizeOpShift_NCHW_3_P(TestQuantizeOpShift_NCHW_2_P):
+    def set_input_size(self):
+        self.input_size = [2, 3, 4]
+
+
+class TestQuantizeOpShift_NCHW_3_N(TestQuantizeOpShift_NCHW_2_N):
+    def set_input_size(self):
+        self.input_size = [2, 3, 4]
+
+
+class TestQuantizeOpShift_NHWC_3_P(TestQuantizeOpShift_NCHW_3_P):
+    def set_output_format(self):
+        self.output_format = 'NHWC'
+
+
+class TestQuantizeOpShift_NHWC_3_N(TestQuantizeOpShift_NCHW_3_N):
+    def set_output_format(self):
+        self.output_format = 'NHWC'
+
+
+# 4-dim input
+class TestQuantizeOpShift_NCHW_4_P(TestQuantizeOpShift_NCHW_2_P):
+    def set_input_size(self):
+        self.input_size = [2, 3, 4, 5]
 
+
+class TestQuantizeOpShift_NCHW_4_N(TestQuantizeOpShift_NCHW_2_N):
+    def set_input_size(self):
+        self.input_size = [2, 3, 4, 5]
+
+
+class TestQuantizeOpShift_NHWC_4_P(TestQuantizeOpShift_NCHW_4_P):
+    def set_output_format(self):
+        self.output_format = 'NHWC'
+
+
+class TestQuantizeOpShift_NHWC_4_N(TestQuantizeOpShift_NCHW_4_N):
+    def set_output_format(self):
+        self.output_format = 'NHWC'
+
+
+class TestQuantizeOp_NegativeShift(TestQuantizeOp):
     def set_is_negative(self):
         self.is_nagative = False
 
+    def set_scale(self):
+        self.scale = 100.0
+
+    def set_shift(self):
+        self.shift = -10.0
+
+    def prepare_output(self):
+        self.output = np.zeros(self.input_size)
+        self.outputs = {'Output': self.output}
+
+    def test_check_output(self):
+        self.assertRaises(AttributeError, self.check_raise_error,
+                          'Quantization shift must be nonnegative.')
+
+
+class TestQuantizeOp_TooBigShift(TestQuantizeOp_NegativeShift):
+    def set_shift(self):
+        self.shift = 300.0
+
+    def test_check_output(self):
+        self.assertRaises(
+            AttributeError, self.check_raise_error,
+            'Quantization shift must be less than or equal to 255.')
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_requantize_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_requantize_mkldnn_op.py
index 750e7f37df4bff34006f34ff52292bfc621a75fc..7babec667b8e29eb14bc5da9b8773081506ba613 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_requantize_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_requantize_mkldnn_op.py
@@ -25,88 +25,271 @@ from mkldnn_op_test import format_reorder
 class TestReQuantizeOp(OpTest):
     def setUp(self):
         self.op_type = 'requantize'
-        self.scale_in = 2.0
-        self.scale_out = 1.5
+        self.scale_in = 127.0
+        self.shift_in = 0.0
+        self.scale_out = 100.0
+        self.shift_out = 0.0
         self.input_size = [1, 1, 10, 10]
-        self.data_type = 'int8'
-        self.set_scale()
-        self.set_data_type()
-        self.prepare_inputs()
-
-    def prepare_inputs(self):
-        scale_shift = self.scale_out / self.scale_in
-
-        if self.data_type == 'int8':
-            self.input = (np.random.randint(0, 100, self.input_size) - 50
-                          ).astype(self.data_type)
-            output_tmp = np.round(self.input.astype('float32') *
-                                  scale_shift).astype('int8')
+        self.input_data_type = 'int8'
+        self.set_scales()
+        self.set_shifts()
+        self.set_input_data_type()
+        self.prepare_input()
+        self.prepare_output()
+
+    def prepare_input(self):
+        if self.input_data_type == 'int8':
+            # input data values are integers from interval [-128, 128)
+            self.input = (np.random.randint(0, 256, self.input_size) - 128
+                          ).astype(self.input_data_type)
         else:
+            # input data values are integers from interval [0, 256)
             self.input = (np.random.randint(
-                0, 100, self.input_size)).astype(self.data_type)
-            output_tmp = np.round(self.input.astype('float32') *
-                                  scale_shift).astype('uint8')
-
-        self.output = format_reorder(output_tmp, self.input_size)
+                0, 256, self.input_size)).astype(self.input_data_type)
 
         self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(self.input)}
+        self.attrs = {
+            'Scale_in': self.scale_in,
+            'Scale_out': self.scale_out,
+            'Shift_in': self.shift_in,
+            'Shift_out': self.shift_out
+        }
 
-        self.outputs = {'Output': self.output}
+    def prepare_output(self):
+        scale_ratio = self.scale_out / self.scale_in
+        with_shift = (self.shift_in != 0.0 or self.shift_out != 0.0)
+
+        if with_shift or self.input_data_type == 'uint8':
+            dst_type = 'uint8'
+            type_min = 0
+            type_max = 255
+            new_shift = np.clip(
+                np.rint(self.shift_out - scale_ratio * self.shift_in), type_min,
+                type_max)
+        else:
+            dst_type = 'int8'
+            type_min = -128
+            type_max = 127
+            new_shift = 0
 
-        self.attrs = {'Scale_in': self.scale_in, 'Scale_out': self.scale_out}
+        output_tmp = np.clip(
+            np.rint(self.input.astype('float32') * scale_ratio + new_shift),
+            type_min, type_max).astype(dst_type)
+
+        self.output = format_reorder(output_tmp, self.input_size)
+        self.outputs = {'Output': self.output}
 
     def test_check_output(self):
         # TODO(wangzhongpu): support mkldnn op in dygraph mode
+        self.assertTrue(self.input_data_type == 'uint8' or self.shift_in == 0.0,
+                        'Input data must be unsigned if it has nonzero shift.')
         self.check_output(check_dygraph=False)
 
-    def set_scale(self):
+    def check_raise_error(self, msg):
+        try:
+            self.check_output()
+        except Exception as e:
+            if msg in str(e):
+                raise AttributeError
+            else:
+                print(e)
+
+    def set_scales(self):
         pass
 
-    def set_data_type(OpTest):
+    def set_shifts(self):
         pass
 
+    def set_input_data_type(OpTest):
+        pass
+
+
+# ---------------test requantize with s8 input, no shift--------------------
+
 
-#--------------------test requantize with s8 input--------------------
+class TestReQuantizeOp_S8_SameScales(TestReQuantizeOp):
+    def set_scales(self):
+        self.scale_in = 127.0
+        self.scale_out = 127.0
 
 
-class TestReQuantizeOp1(TestReQuantizeOp):
-    def set_scale(self):
-        self.scale_in = 1.5
-        self.scale_out = 1.5
+class TestReQuantizeOp_S8_DifferentScales_1(TestReQuantizeOp):
+    def set_scales(self):
+        self.scale_in = 127.0
+        self.scale_out = 100.0
 
 
-class TestReQuantizeOp2(TestReQuantizeOp):
-    def set_scale(self):
-        self.scale_in = 0.1
-        self.scale_out = 0.2
+class TestReQuantizeOp_S8_DifferentScales_2(TestReQuantizeOp):
+    def set_scales(self):
+        self.scale_in = 100.0
+        self.scale_out = 127.0
 
 
-#--------------------test requantize with u8 input--------------------
+class TestReQuantizeOp_S8_ZeroInputScale(TestReQuantizeOp):
+    def set_scales(self):
+        self.scale_in = 0.0
+        self.scale_out = 127.0
 
+    def prepare_output(self):
+        self.output = np.zeros(self.input_size)
+        self.outputs = {'Output': self.output}
+
+    def test_check_output(self):
+        self.assertRaises(AttributeError, self.check_raise_error,
+                          'Scale of input cannot be 0.0')
 
-class TestReQuantizeOp3(TestReQuantizeOp1):
-    def set_data_type(self):
-        self.data_type = 'uint8'
 
+class TestReQuantizeOp_S8_ZeroOutputScale(TestReQuantizeOp):
+    def set_scales(self):
+        self.scale_in = 127.0
+        self.scale_out = 0.0
 
-class TestReQuantizeOp4(TestReQuantizeOp2):
-    def set_data_type(self):
-        self.data_type = 'uint8'
+    def prepare_output(self):
+        self.output = np.zeros(self.input_size)
+        self.outputs = {'Output': self.output}
 
+    def test_check_output(self):
+        self.assertRaises(AttributeError, self.check_raise_error,
+                          'Scale of output cannot be 0.0')
+
+
+# ---------------test requantize with u8 input, no shift--------------------
+
+
+class TestReQuantizeOp_U8_SameScales(TestReQuantizeOp_S8_SameScales):
+    def set_input_data_type(self):
+        self.input_data_type = 'uint8'
+
+
+class TestReQuantizeOp_U8_DifferentScales_1(
+        TestReQuantizeOp_S8_DifferentScales_1):
+    def set_input_data_type(self):
+        self.input_data_type = 'uint8'
+
+
+class TestReQuantizeOp_U8_DifferentScales_2(
+        TestReQuantizeOp_S8_DifferentScales_2):
+    def set_input_data_type(self):
+        self.input_data_type = 'uint8'
+
+
+# ---------------test requantize with s8 input, with shift------------------
+
+
+class TestReQuantizeOp_S8_WithShift(TestReQuantizeOp):
+    def set_scales(self):
+        self.scale_in = 60.0
+        self.scale_out = 127.0
+
+    def set_shifts(self):
+        self.shift_in = 128.0
+        self.shift_out = 128.0
+
+    def test_check_output(self):
+        self.assertRaises(
+            AttributeError, self.check_raise_error,
+            'Requantize does not support nonzero shift for signed input.')
 
-#-------------------test reused requantize op---------------------------
+
+class TestReQuantizeOp_S8_WithOutputShift(TestReQuantizeOp):
+    def set_scales(self):
+        self.scale_in = 127.0
+        self.scale_out = 60.0
+
+    def set_shifts(self):
+        self.shift_in = 0.0
+        self.shift_out = 120.0
+
+
+# ---------------test requantize with u8 input, with shift------------------
+
+
+class TestReQuantizeOp_U8_SameScales_SameShift(TestReQuantizeOp_U8_SameScales):
+    def set_shifts(self):
+        self.shift_in = 128.0
+        self.shift_out = 128.0
+
+
+class TestReQuantizeOp_U8_SameScales_DifferentShift_1(
+        TestReQuantizeOp_U8_SameScales):
+    def set_shifts(self):
+        self.shift_in = 60.0
+        self.shift_out = 128.0
+
+
+class TestReQuantizeOp_U8_SameScales_DifferentShift_2(
+        TestReQuantizeOp_U8_SameScales):
+    def set_shifts(self):
+        self.shift_in = 128.0
+        self.shift_out = 60.0
+
+
+class TestReQuantizeOp_U8_DifferentScales_1_SameShift(
+        TestReQuantizeOp_U8_DifferentScales_1):
+    def set_shifts(self):
+        self.shift_in = 128.0
+        self.shift_out = 128.0
+
+
+class TestReQuantizeOp_U8_DifferentScales_2_SameShift(
+        TestReQuantizeOp_U8_DifferentScales_2):
+    def set_shifts(self):
+        self.shift_in = 128.0
+        self.shift_out = 128.0
+
+
+class TestReQuantizeOp_U8_DifferentScales_1_DifferentShift_1(
+        TestReQuantizeOp_U8_DifferentScales_1):
+    def set_shifts(self):
+        self.shift_in = 128.0
+        self.shift_out = 60.0
+
+
+class TestReQuantizeOp_U8_DifferentScales_2_DifferentShift_1(
+        TestReQuantizeOp_U8_DifferentScales_2):
+    def set_shifts(self):
+        self.shift_in = 128.0
+        self.shift_out = 60.0
+
+
+class TestReQuantizeOp_U8_DifferentScales_1_DifferentShift_2(
+        TestReQuantizeOp_U8_DifferentScales_1):
+    def set_shifts(self):
+        self.shift_in = 60.0
+        self.shift_out = 128.0
+
+
+class TestReQuantizeOp_U8_DifferentScales_2_DifferentShift_2(
+        TestReQuantizeOp_U8_DifferentScales_2):
+    def set_shifts(self):
+        self.shift_in = 60.0
+        self.shift_out = 128.0
+
+
+# ---------------test reused requantize op, no shift------------------------
 
 
 class TestReQuantizeOpReused(TestReQuantizeOp):
     def setUp(self):
-        self.input_size = [1, 1, 10, 10]
-        self.data_type = 'int8'
-        self.set_scale()
-        self.prepare_inputs()
-
-    def set_scale(self):
-        self.scale_in = 0.1
-        self.scale_out = 0.2
+        #  self.input_size = [1, 1, 10, 10]
+        self.input_size = [1, 1, 2, 2]
+        self.input_data_type = 'int8'
+        self.set_scales()
+        self.set_shifts()
+        self.set_input_data_type()
+        self.prepare_input()
+        self.prepare_output()
+
+    def set_scales(self):
+        self.scale_in = 100.0
+        self.scale_out = 120.0
+
+    def set_shifts(self):
+        self.shift_in = 0.0
+        self.shift_out = 0.0
+
+    def set_input_data_type(self):
+        pass
 
     def test_check_output(self):
         variables = {
@@ -119,12 +302,16 @@ class TestReQuantizeOpReused(TestReQuantizeOp):
             for name in variables:
                 block.create_var(
                     name=name, dtype="int8", shape=variables[name].shape)
-            requant_op = block.append_op(
+            block.append_op(
                 type="requantize",
                 inputs={'Input': block.var('input'), },
                 outputs={"Output": block.var('output')},
-                attrs={'Scale_in': self.scale_in,
-                       'Scale_out': self.scale_out})
+                attrs={
+                    'Scale_in': self.scale_in,
+                    'Scale_out': self.scale_out,
+                    'Shift_in': self.shift_in,
+                    'Shift_out': self.shift_out
+                })
             place = core.CPUPlace()
             exe = fluid.Executor(place)
             for i in range(2):
@@ -137,5 +324,17 @@ class TestReQuantizeOpReused(TestReQuantizeOp):
                     variables['output'], out[0], atol=1e-4), 'output')
 
 
+# ---------------test reused requantize op, no shift------------------------
+
+
+class TestReQuantizeOpReused_WithShift(TestReQuantizeOpReused):
+    def set_input_data_type(self):
+        self.input_data_type = 'uint8'
+
+    def set_shifts(self):
+        self.shift_in = 128
+        self.shift_out = 60
+
+
 if __name__ == '__main__':
     unittest.main()