diff --git a/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py b/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py
index 112623d23a65f2cd6e2747e24f3fb72c9d9b5cf3..7930923668c7d77ebf90e229e4c1032575fac710 100644
--- a/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py
+++ b/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py
@@ -93,7 +93,8 @@ class Quant2Int8MkldnnPass(object):
         graph = self._dequantize_weights(graph)
         graph = self._optimize_fp32_graph(graph)
         graph = self._compute_weight_scales(graph)
-        graph = self._update_relu_output_scales(graph)
+        # This function causes nondeterministic quantization behavior
+        # graph = self._update_relu_output_scales(graph)
         graph = self._propagate_scales(graph)
         graph = self._quantize_fp32_graph(graph)
         graph = self._final_optimizations(graph)
diff --git a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
index e55db665052cec0176e4e070f2bcb06190fabde7..03503111fca9a6e259aefe8657ac07a69e6bcaf1 100644
--- a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
+++ b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
@@ -92,17 +92,14 @@ function(inference_quant2_int8_nlp_test target quant_model_dir fp32_model_dir da
 		 --ops_to_quantize ${ops_to_quantize})
 endfunction()
 
-function(inference_quant2_int8_lstm_model_test target fp32_model dataset_path)
+function(inference_quant2_int8_lstm_model_test target fp32_model quant_model dataset_path)
     py_test(${target} SRCS "${CMAKE_CURRENT_SOURCE_DIR}/quant2_int8_lstm_model.py"
-            ENVS FLAGS_OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI}
-                 OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI}
-                 FLAGS_use_mkldnn=true
             ARGS --fp32_model ${fp32_model}
+                 --quant_model ${quant_model}
                  --infer_data ${dataset_path}
-                 --num_threads 4
+                 --num_threads 1
                  --mkldnn_cache_capacity 100
                  --warmup_iter 100
-                 --warmup_batch_size 1
                  --acc_diff_threshold 0.11)
 endfunction()
 
@@ -293,11 +290,10 @@ if(LINUX AND WITH_MKLDNN)
 
 	# PTQ int8 lstm model
 	set(LSTM_DATA_ARCHIVE "unittest_model_data/quant_lstm_input_data.tar.gz")
-	set(QUANT2_INT8_LSTM_SAVE_PATH "${QUANT_INSTALL_DIR}/lstm_quant2")
 	download_quant_data(${QUANT2_INT8_LSTM_SAVE_PATH} ${LSTM_DATA_ARCHIVE} add84c754e9b792fea1fbd728d134ab7)
 	set(QUANT2_FP32_LSTM_MODEL_ARCHIVE "lstm_fp32_model.tar.gz")
 	download_lstm_model(${QUANT2_INT8_LSTM_SAVE_PATH} ${QUANT2_FP32_LSTM_MODEL_ARCHIVE} eecd9f44d69a84acc1cf2235c4b8b743)
-	inference_quant2_int8_lstm_model_test(test_quant2_int8_lstm_mkldnn ${QUANT2_INT8_LSTM_SAVE_PATH}/lstm_fp32_model ${QUANT2_INT8_LSTM_SAVE_PATH}/quant_lstm_input_data)
+	inference_quant2_int8_lstm_model_test(test_quant2_int8_lstm_mkldnn ${QUANT2_INT8_LSTM_SAVE_PATH}/lstm_fp32_model ${QUANT2_LSTM_MODEL_DIR}/lstm_quant ${QUANT2_INT8_LSTM_SAVE_PATH}/quant_lstm_input_data)
 
 endif()
 
diff --git a/python/paddle/fluid/contrib/slim/tests/quant2_int8_lstm_model.py b/python/paddle/fluid/contrib/slim/tests/quant2_int8_lstm_model.py
index 0e33bd8ba1a4e085fc46ff132a20c1a4a06360bf..4f4a2ddd4ab417096776f5c1da70b3e1860160ee 100644
--- a/python/paddle/fluid/contrib/slim/tests/quant2_int8_lstm_model.py
+++ b/python/paddle/fluid/contrib/slim/tests/quant2_int8_lstm_model.py
@@ -20,30 +20,28 @@ import time
 import unittest
 from paddle import fluid
 from paddle.fluid.core import AnalysisConfig, create_paddle_predictor
+from save_quant_model import transform_and_save_int8_model
 
 
 def parse_args():
     parser = argparse.ArgumentParser()
     parser.add_argument(
         '--fp32_model', type=str, default='', help='A path to a FP32 model.')
-    parser.add_argument('--infer_data', type=str, default='', help='Data file.')
     parser.add_argument(
-        '--num_threads', type=int, default=1, help='Number of threads.')
+        '--quant_model', type=str, default='', help='A path to a quant model.')
+    parser.add_argument('--infer_data', type=str, default='', help='Data file.')
     parser.add_argument(
         '--warmup_iter',
         type=int,
         default=1,
         help='Number of the first iterations to skip in performance statistics.')
-    parser.add_argument(
-        '--warmup_batch_size',
-        type=int,
-        default=1,
-        help='Number of batches to use in PTQ warmup. Default: 1.')
     parser.add_argument(
         '--acc_diff_threshold',
         type=float,
         default=0.01,
         help='Accepted accuracy difference threshold.')
+    parser.add_argument(
+        '--num_threads', type=int, default=1, help='Number of threads.')
     parser.add_argument(
         '--mkldnn_cache_capacity',
         type=int,
@@ -56,7 +54,7 @@ def parse_args():
 
 
 class TestLstmModelPTQ(unittest.TestCase):
-    def get_warmup_tensor(self, data_path, place, warmup_batch_size):
+    def get_warmup_tensor(self, data_path, place):
         data = []
         with open(data_path, 'rb') as in_f:
             while True:
@@ -87,30 +85,31 @@ class TestLstmModelPTQ(unittest.TestCase):
                 infer_label.shape = label.shape
                 infer_label.dtype = fluid.core.PaddleDType.INT32
                 data.append([infer_data, infer_label])
-        warmup_data = data[:warmup_batch_size]
-        inputs = data[warmup_batch_size:]
+        warmup_data = data[:1]
+        inputs = data[1:]
         return warmup_data, inputs
 
     def set_config(self,
                    model_path,
                    num_threads,
                    mkldnn_cache_capacity,
-                   warmup_batch_size,
                    warmup_data=None,
-                   enable_int8=False):
+                   use_analysis=False,
+                   enable_ptq=False):
         config = AnalysisConfig(model_path)
-        config.disable_gpu()
-        config.switch_use_feed_fetch_ops(True)
-        config.switch_ir_optim(True)
         config.set_cpu_math_library_num_threads(num_threads)
-        # This pass to work properly, must be added before fc_fuse_pass
-        config.pass_builder().insert_pass(5, "fc_lstm_fuse_pass")
-        config.enable_mkldnn()
-        config.set_mkldnn_cache_capacity(mkldnn_cache_capacity)
-        if enable_int8:
-            config.enable_quantizer()
-            config.quantizer_config().set_quant_data(warmup_data)
-            config.quantizer_config().set_quant_batch_size(warmup_batch_size)
+        if use_analysis:
+            config.disable_gpu()
+            config.switch_use_feed_fetch_ops(True)
+            config.switch_ir_optim(True)
+            config.enable_mkldnn()
+            config.set_mkldnn_cache_capacity(mkldnn_cache_capacity)
+            if enable_ptq:
+                # This pass to work properly, must be added before fc_fuse_pass
+                config.pass_builder().insert_pass(5, "fc_lstm_fuse_pass")
+                config.enable_quantizer()
+                config.quantizer_config().set_quant_data(warmup_data)
+                config.quantizer_config().set_quant_batch_size(1)
         return config
 
     def run_program(self,
@@ -119,15 +118,13 @@ class TestLstmModelPTQ(unittest.TestCase):
                     num_threads,
                     mkldnn_cache_capacity,
                     warmup_iter,
-                    warmup_batch_size,
-                    enable_ptq_int8=False):
+                    use_analysis=False,
+                    enable_ptq=False):
         place = fluid.CPUPlace()
-        warmup_data, inputs = self.get_warmup_tensor(data_path, place,
-                                                     warmup_batch_size)
+        warmup_data, inputs = self.get_warmup_tensor(data_path, place)
         warmup_data = [item[0] for item in warmup_data]
         config = self.set_config(model_path, num_threads, mkldnn_cache_capacity,
-                                 warmup_batch_size, warmup_data,
-                                 enable_ptq_int8)
+                                 warmup_data, use_analysis, enable_ptq)
 
         predictor = create_paddle_predictor(config)
         data = [item[0] for item in inputs]
@@ -183,34 +180,47 @@ class TestLstmModelPTQ(unittest.TestCase):
 
         fp32_model = test_case_args.fp32_model
         assert fp32_model, 'The FP32 model path cannot be empty. Please, use the --fp32_model option.'
+        quant_model = test_case_args.quant_model
+        assert quant_model, 'The quant model path cannot be empty. Please, use the --quant_model option.'
         infer_data = test_case_args.infer_data
         assert infer_data, 'The dataset path cannot be empty. Please, use the --infer_data option.'
         num_threads = test_case_args.num_threads
         mkldnn_cache_capacity = test_case_args.mkldnn_cache_capacity
         warmup_iter = test_case_args.warmup_iter
-        warmup_batch_size = test_case_args.warmup_batch_size
         acc_diff_threshold = test_case_args.acc_diff_threshold
 
         (fp32_hx_acc, fp32_ctc_acc, fp32_fps) = self.run_program(
             fp32_model, infer_data, num_threads, mkldnn_cache_capacity,
-            warmup_iter, warmup_batch_size, False)
+            warmup_iter, False, False)
 
         (int8_hx_acc, int8_ctc_acc, int8_fps) = self.run_program(
             fp32_model, infer_data, num_threads, mkldnn_cache_capacity,
-            warmup_iter, warmup_batch_size, True)
+            warmup_iter, True, True)
+
+        quant_model_save_path = quant_model + "_int8"
+        # transform model to quant2
+        transform_and_save_int8_model(quant_model, quant_model_save_path,
+                                      "fusion_lstm,concat")
 
-        print("FP32: fps {0}, hx_acc {1}, ctc_acc {2}.".format(
+        (quant_hx_acc, quant_ctc_acc, quant_fps) = self.run_program(
+            quant_model_save_path, infer_data, num_threads,
+            mkldnn_cache_capacity, warmup_iter, True, False)
+
+        print("FP32: fps {0}, hx_acc {1}, ctc_acc {2}".format(
             fp32_fps, fp32_hx_acc, fp32_ctc_acc))
 
-        print("PTQ INT8: fps {0}, hx_acc {1}, ctc_acc {2}.".format(
+        print("PTQ_INT8: fps {0}, hx_acc {1}, ctc_acc {2}".format(
             int8_fps, int8_hx_acc, int8_ctc_acc))
 
+        print("QUANT2_INT8: fps {0}, hx_acc {1}, ctc_acc {2}".format(
+            quant_fps, quant_hx_acc, quant_ctc_acc))
+
         sys.stdout.flush()
 
-        hx_delta_value = fp32_hx_acc - int8_hx_acc
-        ctc_delta_value = fp32_ctc_acc - int8_ctc_acc
-        self.assertLess(hx_delta_value, acc_diff_threshold)
-        self.assertLess(ctc_delta_value, acc_diff_threshold)
+        self.assertLess(fp32_hx_acc - int8_hx_acc, acc_diff_threshold)
+        self.assertLess(fp32_ctc_acc - int8_ctc_acc, acc_diff_threshold)
+        self.assertLess(fp32_hx_acc - quant_hx_acc, acc_diff_threshold)
+        self.assertLess(fp32_ctc_acc - quant_ctc_acc, acc_diff_threshold)
 
 
 if __name__ == "__main__":
diff --git a/python/paddle/fluid/contrib/slim/tests/save_quant_model.py b/python/paddle/fluid/contrib/slim/tests/save_quant_model.py
index e38148250af2177801995d263dc6d3c9502bc501..3fadf25150f9ef3556a343fdce8acc24d788f5dc 100644
--- a/python/paddle/fluid/contrib/slim/tests/save_quant_model.py
+++ b/python/paddle/fluid/contrib/slim/tests/save_quant_model.py
@@ -16,11 +16,6 @@ import unittest
 import os
 import sys
 import argparse
-import logging
-import struct
-import six
-import numpy as np
-import time
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid.framework import IrGraph
@@ -62,7 +57,11 @@ def parse_args():
     return test_args, sys.argv[:1] + args
 
 
-def transform_and_save_int8_model(original_path, save_path):
+def transform_and_save_int8_model(original_path,
+                                  save_path,
+                                  ops_to_quantize='',
+                                  op_ids_to_skip='',
+                                  debug=False):
     place = fluid.CPUPlace()
     exe = fluid.Executor(place)
     inference_scope = fluid.executor.global_scope()
@@ -75,24 +74,26 @@ def transform_and_save_int8_model(original_path, save_path):
              fetch_targets] = fluid.io.load_inference_model(original_path, exe,
                                                             'model', 'params')
 
-        ops_to_quantize = set()
-        if len(test_args.ops_to_quantize) > 0:
-            ops_to_quantize = set(test_args.ops_to_quantize.split(','))
+        ops_to_quantize_set = set()
+        print(ops_to_quantize)
+        if len(ops_to_quantize) > 0:
+            ops_to_quantize_set = set(ops_to_quantize.split(','))
 
-        op_ids_to_skip = set([-1])
-        if len(test_args.op_ids_to_skip) > 0:
-            op_ids_to_skip = set(map(int, test_args.op_ids_to_skip.split(',')))
+        op_ids_to_skip_set = set([-1])
+        print(op_ids_to_skip)
+        if len(op_ids_to_skip) > 0:
+            op_ids_to_skip_set = set(map(int, op_ids_to_skip.split(',')))
 
         graph = IrGraph(core.Graph(inference_program.desc), for_test=True)
-        if (test_args.debug):
+        if (debug):
             graph.draw('.', 'quant_orig', graph.all_op_nodes())
         transform_to_mkldnn_int8_pass = Quant2Int8MkldnnPass(
-            ops_to_quantize,
-            _op_ids_to_skip=op_ids_to_skip,
+            ops_to_quantize_set,
+            _op_ids_to_skip=op_ids_to_skip_set,
             _scope=inference_scope,
             _place=place,
             _core=core,
-            _debug=test_args.debug)
+            _debug=debug)
         graph = transform_to_mkldnn_int8_pass.apply(graph)
         inference_program = graph.to_program()
         with fluid.scope_guard(inference_scope):
@@ -106,5 +107,6 @@ def transform_and_save_int8_model(original_path, save_path):
 if __name__ == '__main__':
     global test_args
     test_args, remaining_args = parse_args()
-    transform_and_save_int8_model(test_args.quant_model_path,
-                                  test_args.int8_model_save_path)
+    transform_and_save_int8_model(
+        test_args.quant_model_path, test_args.int8_model_save_path,
+        test_args.ops_to_quantize, test_args.op_ids_to_skip, test_args.debug)