diff --git a/paddle/fluid/operators/fake_quantize_op.cc b/paddle/fluid/operators/fake_quantize_op.cc
index a91e0f520e93c01bc5af09b691af2d5a6deda9f2..4b855fb6470353b5fa11a939a7f5bea39ea9c3a6 100644
--- a/paddle/fluid/operators/fake_quantize_op.cc
+++ b/paddle/fluid/operators/fake_quantize_op.cc
@@ -18,6 +18,52 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
+class FakeQuantizeAbsMaxOp : public framework::OperatorWithKernel {
+ public:
+  FakeQuantizeAbsMaxOp(const std::string &type,
+                       const framework::VariableNameMap &inputs,
+                       const framework::VariableNameMap &outputs,
+                       const framework::AttributeMap &attrs)
+      : OperatorWithKernel(type, inputs, outputs, attrs) {}
+
+  void InferShape(framework::InferShapeContext *ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("X"),
+                   "Input(X) of FakeQuantizeOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutput("Out"),
+                   "Output(Out) of FakeQuantizeOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutput("OutScale"),
+                   "Output(Scale) of FakeQuantizeOp should not be null.");
+    ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
+    ctx->SetOutputDim("OutScale", {1});
+    ctx->ShareLoD("X", /*->*/ "Out");
+  }
+};
+
+class FakeQuantizeAbsMaxOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    AddInput("X", "(Tensor) Input is float data type.");
+    AddOutput("Out",
+              "(Tensor) Output of quantized low level tensor, "
+              "but also saved as float data type.");
+    AddOutput("OutScale", "(Tensor) Current scale");
+    AddAttr<int>("bit_length", "(int, default 8)")
+        .SetDefault(8)
+        .AddCustomChecker([](const int &bit_length) {
+          PADDLE_ENFORCE(bit_length >= 1 && bit_length <= 16,
+                         "'bit_length' should be between 1 and 16.");
+        });
+    AddComment(R"DOC(
+FakeQuantize operator
+
+$$scale = max(abs(X))$$ 
+$$range = 2^{bit_length - 1} - 1$$
+$$Out = round(X/scale * range)$$
+
+)DOC");
+  }
+};
+
 class FakeQuantizeOp : public framework::OperatorWithKernel {
  public:
   FakeQuantizeOp(const std::string &type,
diff --git a/paddle/fluid/operators/fake_quantize_op.h b/paddle/fluid/operators/fake_quantize_op.h
index 80f71d85dde39f773cc489fb79effcc775c5010a..ce92f48ad3093e1526c07bf3067bd6000795e074 100644
--- a/paddle/fluid/operators/fake_quantize_op.h
+++ b/paddle/fluid/operators/fake_quantize_op.h
@@ -24,6 +24,131 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
+template <typename DeviceContext, typename T>
+class FakeQuantizeAbsMaxKernel : public framework::OpKernel<T> {
+ public:
+  T FindAbsMax(framework::Tensor* in, int n) const {
+    T* p = in->mutable_data<T>(platform::CPUPlace());
+    T abs_max = (T)0.00000001;
+    for (int i = 0; i < n; i++) {
+      T tmp = fabs(p[i]);
+      if (tmp > abs_max) abs_max = tmp;
+    }
+    return T(abs_max);
+  }
+  T FindRangeAbsMax(framework::Tensor* scale_list, framework::Tensor* out_scale,
+                    const T& cur_scale, int window_size,
+                    int current_iter) const {
+    T* sl = scale_list->mutable_data<T>(platform::CPUPlace());
+    T remove_tmp = sl[current_iter];
+    sl[current_iter] = cur_scale;
+    T& max_scale = out_scale->mutable_data<T>(platform::CPUPlace())[0];
+    if (max_scale < cur_scale) {
+      max_scale = cur_scale;
+    } else if (fabs(remove_tmp - max_scale) < 1e-6) {
+      int size = (current_iter > window_size) ? window_size : current_iter;
+      max_scale = T(FindAbsMax(scale_list, size));
+    }
+    return max_scale;
+  }
+
+  T FindMovingAverageAbsMmax(framework::Tensor* in_scale,
+                             framework::Tensor* out_scale,
+                             const T& cur_scale) const {
+    T* ins = in_scale->mutable_data<T>(platform::CPUPlace());
+    T* outs = out_scale->mutable_data<T>(platform::CPUPlace());
+    outs[0] = 0.9 * cur_scale + 0.1 * ins[0];
+    return T(outs[0]);
+  }
+
+  virtual void Compute(const framework::ExecutionContext& context) const {
+    auto* tensor = context.Output<framework::Tensor>("Out");
+    auto* in = context.Input<framework::Tensor>("X");
+    const bool is_test = context.Attr<bool>("is_test");
+    tensor->mutable_data<T>(in->place());
+
+    auto* oms_tensor = context.Output<framework::Tensor>("OutMovingScale");
+    oms_tensor->mutable_data<T>(in->place());
+
+    auto quantize_type =
+        static_cast<std::string>(context.Attr<std::string>("quantize_type"));
+    if (quantize_type == std::string("range_abs_max")) {
+      auto* oss_tensor = context.Output<framework::Tensor>("OutScales");
+      oss_tensor->mutable_data<T>(
+          context.Input<framework::Tensor>("InScales")->place());
+      auto* oci_tensor = context.Output<framework::Tensor>("OutCurrentIter");
+      oci_tensor->mutable_data<T>(
+          context.Input<framework::Tensor>("InCurrentIter")->place());
+    }
+
+    T scale = static_cast<T>(1);
+    int window_size = context.Attr<int>("window_size");
+    int bit_length = context.Attr<int>("bit_length");
+    int bin_cnt = std::pow(2, bit_length - 1) - 1;
+
+    auto& dev =
+        *context.template device_context<DeviceContext>().eigen_device();
+    auto raw_in = framework::EigenVector<T>::Flatten(*in);
+    if (quantize_type == std::string("abs_max")) {
+      auto* saving_scale = context.Output<framework::Tensor>("OutMovingScale");
+      auto scale_out = framework::EigenVector<T>::Flatten(*saving_scale);
+      scale_out.device(dev) = raw_in.abs().maximum();
+      scale = scale_out(0);
+
+      auto& device_ctx = context.template device_context<DeviceContext>();
+      auto* scale_list = context.Output<framework::Tensor>("OutScales");
+      math::SetConstant<DeviceContext, T> scalar;
+      scale_list->mutable_data<T>(context.GetPlace());
+      scalar(device_ctx, scale_list, static_cast<T>(0));
+      auto* iter = context.Output<framework::Tensor>("OutCurrentIter");
+      iter->mutable_data<T>(context.GetPlace());
+      scalar(device_ctx, iter, static_cast<T>(0));
+    } else if (quantize_type == std::string("range_abs_max")) {
+      auto* moving_scale = context.Input<framework::Tensor>("InMovingScale");
+      if (is_test) {
+        scale = moving_scale->data<T>()[0];
+      } else {
+        auto* it = context.Input<framework::Tensor>("InCurrentIter");
+        auto* iter = context.Output<framework::Tensor>("OutCurrentIter");
+        const int* last_iter = it->data<int>();
+        int* current_iter = iter->mutable_data<int>(platform::CPUPlace());
+        auto* scale_list = context.Output<framework::Tensor>("OutScales");
+        auto* saving_scale =
+            context.Output<framework::Tensor>("OutMovingScale");
+        auto scale_out = framework::EigenVector<T>::Flatten(*saving_scale);
+        scale_out.device(dev) = raw_in.abs().maximum();
+        scale = saving_scale->mutable_data<T>(platform::CPUPlace())[0];
+        scale = FindRangeAbsMax(scale_list, saving_scale, scale, window_size,
+                                current_iter[0]);
+        saving_scale->mutable_data<T>(platform::CPUPlace())[0] = scale;
+        (*current_iter) = (*last_iter) + 1;
+      }
+    } else if (quantize_type == std::string("moving_average_abs_max")) {
+      auto* moving_scale = context.Input<framework::Tensor>("InMovingScale");
+      if (is_test) {
+        scale = moving_scale->data<T>()[0];
+      } else {
+        auto* saving_scale =
+            context.Output<framework::Tensor>("OutMovingScale");
+        auto scale_out = framework::EigenVector<T>::Flatten(*saving_scale);
+        scale_out.device(dev) = raw_in.abs().maximum();
+        scale = saving_scale->mutable_data<T>(platform::CPUPlace())[0];
+        scale = FindMovingAverageAbsMmax(
+            const_cast<framework::Tensor*>(moving_scale), saving_scale, scale);
+        saving_scale->mutable_data<T>(platform::CPUPlace())[0] = scale;
+      }
+    }
+
+    Transform<DeviceContext> trans;
+    trans(context.template device_context<DeviceContext>(), in->data<T>(),
+          in->data<T>() + in->numel(), tensor->mutable_data<T>(in->place()),
+          ClipFunctor<T>(-scale, scale));
+    auto eigen_out = framework::EigenVector<T>::Flatten(*tensor);
+    auto eigen_in = framework::EigenVector<T>::Flatten(*tensor);
+    eigen_out.device(dev) = (bin_cnt / scale * eigen_in).round();
+  }
+};
+
 using platform::Transform;
 
 template <typename DeviceContext, typename T>