Remove all the code, API and doc of MKL-DNN INT8v1 (#18347)

19da59ed · 翟飞跃 · Tao Luo · 8ed33bf9 · 19da59ed · 19da59ed
9 changed file
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -403,9 +403,6 @@ paddle.fluid.contrib.QuantizeTranspiler.__init__ (ArgSpec(args=['self', 'weight_
 paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 (ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.contrib.QuantizeTranspiler.freeze_program (ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None)), ('document', '909675a1ab055c69b436a7893fcae4fd'))
 paddle.fluid.contrib.QuantizeTranspiler.training_transpile (ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6dd9909f10b283ba2892a99058a72884'))
-paddle.fluid.contrib.Calibrator.__init__ (ArgSpec(args=['self'], varargs='args', keywords='kwargs', defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
-paddle.fluid.contrib.Calibrator.sample_data (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '3b8c85ca1e2cf753cc8c90a6c6992958'))
-paddle.fluid.contrib.Calibrator.save_int8_model (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.contrib.distributed_batch_reader (ArgSpec(args=['batch_reader'], varargs=None, keywords=None, defaults=None), ('document', 'b60796eb0a481484dd34e345f0eaa4d5'))
 paddle.fluid.contrib.reader.ctr_reader.ctr_reader (ArgSpec(args=['feed_dict', 'file_type', 'file_format', 'dense_slot_index', 'sparse_slot_index', 'capacity', 'thread_num', 'batch_size', 'file_list', 'slots', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b2ebf3de2a6ef1af2c3b88d2db7591ab'))
 paddle.fluid.contrib.Compressor.__init__ (ArgSpec(args=['self', 'place', 'scope', 'train_program', 'train_reader', 'train_feed_list', 'train_fetch_list', 'eval_program', 'eval_reader', 'eval_feed_list', 'eval_fetch_list', 'teacher_programs', 'checkpoint_path', 'train_optimizer', 'distiller_optimizer', 'search_space'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, [], None, None, None, None)), ('document', 'c195b3bba26169cff9439e8c467557c0'))

--- a/python/paddle/fluid/contrib/__init__.py
+++ b/python/paddle/fluid/contrib/__init__.py
@@ -22,8 +22,6 @@ from . import op_frequence
 from .op_frequence import *
 from . import quantize
 from .quantize import *
-from . import int8_inference
-from .int8_inference import *
 from . import reader
 from .reader import *
 from . import slim
@@ -44,7 +42,6 @@ __all__ += decoder.__all__
 __all__ += memory_usage_calc.__all__
 __all__ += op_frequence.__all__
 __all__ += quantize.__all__
-__all__ += int8_inference.__all__
 __all__ += reader.__all__
 __all__ += slim.__all__
 __all__ += utils.__all__

--- a/python/paddle/fluid/contrib/int8_inference/README.md
+++ b/python/paddle/fluid/contrib/int8_inference/README.md
-# Offline INT8 Calibration Tool
-
-PaddlePaddle supports offline INT8 calibration to accelerate the inference speed. In this document, we provide the instructions on how to enable INT8 calibration and show the ResNet-50 and MobileNet-V1 results in accuracy.
-
-## 0. Prerequisite
-You need to install at least PaddlePaddle-1.3 python package `pip install paddlepaddle==1.3`.
-
-## 1. How to generate INT8 model
-You can refer to the unit test in [test_calibration_resnet50.py](../tests/test_calibration_resnet50.py). Basically, there are three steps:
-* Construct calibration object.
-
-```python
-calibrator = int8_utility.Calibrator( # Step 1
-    program=infer_program, # required, FP32 program
-    pretrained_model=model_path, # required, FP32 pretrained model
-    algo=algo, # required, calibration algorithm; default is max, the alternative is KL (Kullback–Leibler divergence)
-    exe=exe, # required, executor
-    output=int8_model, # required, INT8 model
-    feed_var_names=feed_dict, # required, feed dict
-    fetch_list=fetch_targets) # required, fetch targets
-```
-
-* Call the calibrator.sample_data() after executor run.
-```python
-_, acc1, _ = exe.run(
-    program,
-    feed={feed_dict[0]: image,
-          feed_dict[1]: label},
-    fetch_list=fetch_targets)
-
-calibrator.sample_data() # Step 2
-```
-
-* Call the calibrator.save_int8_model() after sampling over specified iterations (e.g., iterations = 50)
-```python
-calibrator.save_int8_model() # Step 3
-```
-
-## 2. How to run INT8 model
-You can load INT8 model by load_inference_model [API](https://github.com/PaddlePaddle/Paddle/blob/8b50ad80ff6934512d3959947ac1e71ea3fb9ea3/python/paddle/fluid/io.py#L991) and run INT8 inference similar as [FP32](https://github.com/PaddlePaddle/models/blob/develop/fluid/PaddleCV/object_detection/eval.py "FP32").
-
-```python
-[infer_program, feed_dict,
-    fetch_targets] = fluid.io.load_inference_model(model_path, exe)
-```
-
-## 3. Result
-We provide the results of accuracy and performance measured on Intel(R) Xeon(R) Gold 6271 (single core).
-
-**I. Top-1 Accuracy on Intel(R) Xeon(R) Gold 6271**
-
-| Model  | Dataset  | FP32 Accuracy  | INT8 Accuracy  | Accuracy Diff  |
-| :------------: | :------------: | :------------: | :------------: | :------------: |
-| ResNet-50  | Full ImageNet Val  |  76.63%  | 76.23%  | 0.40% |
-| MobileNet-V1 | Full ImageNet Val  | 70.78%  | 70.47%  | 0.31%  |
-
-**II. Throughput on Intel(R) Xeon(R) Gold 6271 (batch size 1 on single core)**
-
-| Model  | Dataset  | FP32 Throughput  | INT8 Throughput  |  Ratio(INT8/FP32)  |
-| :------------: | :------------: | :------------: | :------------: | :------------: |
-| ResNet-50  | Full ImageNet Val  |  11.54 images/s | 32.2 images/s | 2.79 |
-| MobileNet-V1 | Full ImageNet Val  | 49.21 images/s | 108.37 images/s | 2.2  |
-
-Please note that [full ImageNet validation dataset](http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_img_val.tar "full ImageNet validation dataset") can be downloaded by script `test_calibration.py` with `DATASET=full`. 
-
-Notes:
-* The accuracy measurement requires the model with `label`.
-* The INT8 theoretical speedup is 4X on Intel® Xeon® Cascadelake Server (please refer to `The theoretical peak compute gains are 4x int8 OPS over fp32 OPS.` in  [Reference](https://software.intel.com/en-us/articles/lower-numerical-precision-deep-learning-inference-and-training "Reference")). Therefore, op-level gain is 4X and topology-level is smaller.
-
-## 4. How to reproduce the results
-* Small dataset for ResNet-50 (Single core)
-```bash
-FLAGS_use_mkldnn=true python python/paddle/fluid/contrib/tests/test_calibration_resnet50.py
-```
->Note: Change `test_calibration_resnet50.py` to `test_calibration_mobilenetv1.py` for MobileNet-V1. Same for the following commands.
-
-* Full dataset for ResNet-50 (Single core)
-```bash
-FLAGS_use_mkldnn=true DATASET=full python python/paddle/fluid/contrib/tests/test_calibration_resnet50.py
-```
-
-* Full dataset for ResNet-50 (Multi-core)
-```bash
-FLAGS_use_mkldnn=true OMP_NUM_THREADS=20 DATASET=full python python/paddle/fluid/contrib/tests/test_calibration_resnet50.py
-```
-> Notes: This is an example command with 20 cores by using set `OMP_NUM_THREADS` value.
--- a/python/paddle/fluid/contrib/int8_inference/__init__.py
+++ b/python/paddle/fluid/contrib/int8_inference/__init__.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-from . import utility
-from .utility import *
-
-__all__ = utility.__all__
--- a/python/paddle/fluid/contrib/int8_inference/utility.py
+++ b/python/paddle/fluid/contrib/int8_inference/utility.py
--- a/python/paddle/fluid/contrib/tests/CMakeLists.txt
+++ b/python/paddle/fluid/contrib/tests/CMakeLists.txt
 file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
 string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")

-if(APPLE OR WIN32 OR NOT WITH_MKL)
-    list(REMOVE_ITEM TEST_OPS test_calibration_resnet50)
-    list(REMOVE_ITEM TEST_OPS test_calibration_mobilenetv1)
-endif()
-
 foreach(src ${TEST_OPS})
-    if(src MATCHES "test_calibration_*")
-        py_test(${src} SRCS ${src}.py ENVS FLAGS_use_mkldnn=true FLAGS_OMP_NUM_THREADS=${CPU_NUM_THREADS_ON_CI})
-    else()
        py_test(${src} SRCS ${src}.py)
-    endif()
 endforeach()
--- a/python/paddle/fluid/contrib/tests/test_calibration_mobilenetv1.py
+++ b/python/paddle/fluid/contrib/tests/test_calibration_mobilenetv1.py
-#   copyright (c) 2018 paddlepaddle authors. all rights reserved.
-#
-# licensed under the apache license, version 2.0 (the "license");
-# you may not use this file except in compliance with the license.
-# you may obtain a copy of the license at
-#
-#     http://www.apache.org/licenses/license-2.0
-#
-# unless required by applicable law or agreed to in writing, software
-# distributed under the license is distributed on an "as is" basis,
-# without warranties or conditions of any kind, either express or implied.
-# see the license for the specific language governing permissions and
-# limitations under the license.
-import unittest
-import sys
-from test_calibration_resnet50 import TestCalibration
-
-
-class TestCalibrationForMobilenetv1(TestCalibration):
-    def download_model(self):
-        # mobilenetv1 fp32 data
-        data_urls = [
-            'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz'
-        ]
-        data_md5s = ['13892b0716d26443a8cdea15b3c6438b']
-        self.model_cache_folder = self.download_data(data_urls, data_md5s,
-                                                     "mobilenetv1_fp32")
-        self.model = "MobileNet-V1"
-        self.algo = "KL"
-
-    def test_calibration(self):
-        self.download_model()
-        print("Start FP32 inference for {0} on {1} images ...".format(
-            self.model, self.infer_iterations * self.batch_size))
-        (fp32_throughput, fp32_latency,
-         fp32_acc1) = self.run_program(self.model_cache_folder + "/model")
-        print("Start INT8 calibration for {0} on {1} images ...".format(
-            self.model, self.sample_iterations * self.batch_size))
-        self.run_program(
-            self.model_cache_folder + "/model", True, algo=self.algo)
-        print("Start INT8 inference for {0} on {1} images ...".format(
-            self.model, self.infer_iterations * self.batch_size))
-        (int8_throughput, int8_latency,
-         int8_acc1) = self.run_program(self.int8_model)
-        delta_value = fp32_acc1 - int8_acc1
-        self.assertLess(delta_value, 0.01)
-        print(
-            "FP32 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}".
-            format(self.model, self.batch_size, fp32_throughput, fp32_latency,
-                   fp32_acc1))
-        print(
-            "INT8 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}".
-            format(self.model, self.batch_size, int8_throughput, int8_latency,
-                   int8_acc1))
-        sys.stdout.flush()
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/contrib/tests/test_calibration_resnet50.py
+++ b/python/paddle/fluid/contrib/tests/test_calibration_resnet50.py
-#   copyright (c) 2018 paddlepaddle authors. all rights reserved.
-#
-# licensed under the apache license, version 2.0 (the "license");
-# you may not use this file except in compliance with the license.
-# you may obtain a copy of the license at
-#
-#     http://www.apache.org/licenses/license-2.0
-#
-# unless required by applicable law or agreed to in writing, software
-# distributed under the license is distributed on an "as is" basis,
-# without warranties or conditions of any kind, either express or implied.
-# see the license for the specific language governing permissions and
-# limitations under the license.
-import unittest
-import os
-import numpy as np
-import time
-import sys
-import random
-import paddle
-import paddle.fluid as fluid
-import functools
-import contextlib
-from paddle.dataset.common import download
-from PIL import Image, ImageEnhance
-import math
-import paddle.fluid.contrib.int8_inference.utility as int8_utility
-
-random.seed(0)
-np.random.seed(0)
-
-DATA_DIM = 224
-
-THREAD = 1
-BUF_SIZE = 102400
-
-DATA_DIR = 'data/ILSVRC2012'
-
-img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
-img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
-
-
-# TODO(guomingz): Remove duplicated code from resize_short, crop_image, process_image, _reader_creator
-def resize_short(img, target_size):
-    percent = float(target_size) / min(img.size[0], img.size[1])
-    resized_width = int(round(img.size[0] * percent))
-    resized_height = int(round(img.size[1] * percent))
-    img = img.resize((resized_width, resized_height), Image.LANCZOS)
-    return img
-
-
-def crop_image(img, target_size, center):
-    width, height = img.size
-    size = target_size
-    if center == True:
-        w_start = (width - size) / 2
-        h_start = (height - size) / 2
-    else:
-        w_start = np.random.randint(0, width - size + 1)
-        h_start = np.random.randint(0, height - size + 1)
-    w_end = w_start + size
-    h_end = h_start + size
-    img = img.crop((w_start, h_start, w_end, h_end))
-    return img
-
-
-def process_image(sample, mode, color_jitter, rotate):
-    img_path = sample[0]
-
-    img = Image.open(img_path)
-
-    img = resize_short(img, target_size=256)
-    img = crop_image(img, target_size=DATA_DIM, center=True)
-
-    if img.mode != 'RGB':
-        img = img.convert('RGB')
-
-    img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255
-    img -= img_mean
-    img /= img_std
-
-    return img, sample[1]
-
-
-def _reader_creator(file_list,
-                    mode,
-                    shuffle=False,
-                    color_jitter=False,
-                    rotate=False,
-                    data_dir=DATA_DIR):
-    def reader():
-        with open(file_list) as flist:
-            full_lines = [line.strip() for line in flist]
-            if shuffle:
-                np.random.shuffle(full_lines)
-
-            lines = full_lines
-
-            for line in lines:
-                img_path, label = line.split()
-                img_path = os.path.join(data_dir, img_path)
-                if not os.path.exists(img_path):
-                    continue
-                yield img_path, int(label)
-
-    mapper = functools.partial(
-        process_image, mode=mode, color_jitter=color_jitter, rotate=rotate)
-
-    return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE)
-
-
-def val(data_dir=DATA_DIR):
-    file_list = os.path.join(data_dir, 'val_list.txt')
-    return _reader_creator(file_list, 'val', shuffle=False, data_dir=data_dir)
-
-
-class TestCalibration(unittest.TestCase):
-    def setUp(self):
-        self.int8_download = 'int8/download'
-        self.cache_folder = os.path.expanduser('~/.cache/paddle/dataset/' +
-                                               self.int8_download)
-
-        data_urls = []
-        data_md5s = []
-        self.data_cache_folder = ''
-        if os.environ.get('DATASET') == 'full':
-            data_urls.append(
-                'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partaa'
-            )
-            data_md5s.append('60f6525b0e1d127f345641d75d41f0a8')
-            data_urls.append(
-                'https://paddle-inference-dist.bj.bcebos.com/int8/ILSVRC2012_img_val.tar.gz.partab'
-            )
-            data_md5s.append('1e9f15f64e015e58d6f9ec3210ed18b5')
-            self.data_cache_folder = self.download_data(data_urls, data_md5s,
-                                                        "full_data", False)
-        else:
-            data_urls.append(
-                'http://paddle-inference-dist.bj.bcebos.com/int8/calibration_test_data.tar.gz'
-            )
-            data_md5s.append('1b6c1c434172cca1bf9ba1e4d7a3157d')
-            self.data_cache_folder = self.download_data(data_urls, data_md5s,
-                                                        "small_data", False)
-
-        # reader/decorator.py requires the relative path to the data folder
-        cmd = 'rm -rf {0} && ln -s {1} {0}'.format("data",
-                                                   self.data_cache_folder)
-        os.system(cmd)
-
-        self.batch_size = 1 if os.environ.get('DATASET') == 'full' else 50
-        self.sample_iterations = 50 if os.environ.get(
-            'DATASET') == 'full' else 1
-        self.infer_iterations = 50000 if os.environ.get(
-            'DATASET') == 'full' else 1
-
-        self.timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime())
-        self.int8_model = ''
-
-    def tearDown(self):
-        try:
-            os.system("rm -rf {}".format(self.int8_model))
-        except Exception as e:
-            print("Failed to delete {} due to {}".format(self.int8_model,
-                                                         str(e)))
-
-    def cache_unzipping(self, target_folder, zip_path):
-        if not os.path.exists(target_folder):
-            cmd = 'mkdir {0} && tar xf {1} -C {0}'.format(target_folder,
-                                                          zip_path)
-            os.system(cmd)
-
-    def download_data(self, data_urls, data_md5s, folder_name, is_model=True):
-        data_cache_folder = os.path.join(self.cache_folder, folder_name)
-        zip_path = ''
-        if os.environ.get('DATASET') == 'full':
-            file_names = []
-            for i in range(0, len(data_urls)):
-                download(data_urls[i], self.int8_download, data_md5s[i])
-                file_names.append(data_urls[i].split('/')[-1])
-
-            zip_path = os.path.join(self.cache_folder,
-                                    'full_imagenet_val.tar.gz')
-            if not os.path.exists(zip_path):
-                cat_command = 'cat'
-                for file_name in file_names:
-                    cat_command += ' ' + os.path.join(self.cache_folder,
-                                                      file_name)
-                cat_command += ' > ' + zip_path
-                os.system(cat_command)
-
-        if os.environ.get('DATASET') != 'full' or is_model:
-            download(data_urls[0], self.int8_download, data_md5s[0])
-            file_name = data_urls[0].split('/')[-1]
-            zip_path = os.path.join(self.cache_folder, file_name)
-
-        print('Data is downloaded at {0}'.format(zip_path))
-        self.cache_unzipping(data_cache_folder, zip_path)
-        return data_cache_folder
-
-    def download_model(self):
-        pass
-
-    def run_program(self, model_path, generate_int8=False, algo='direct'):
-        image_shape = [3, 224, 224]
-
-        fluid.memory_optimize(fluid.default_main_program())
-
-        exe = fluid.Executor(fluid.CPUPlace())
-
-        [infer_program, feed_dict,
-         fetch_targets] = fluid.io.load_inference_model(model_path, exe)
-
-        t = fluid.transpiler.InferenceTranspiler()
-        t.transpile(infer_program, fluid.CPUPlace())
-
-        val_reader = paddle.batch(val(), self.batch_size)
-        iterations = self.infer_iterations
-
-        if generate_int8:
-            self.int8_model = os.path.join(os.getcwd(),
-                                           "calibration_out_" + self.timestamp)
-            iterations = self.sample_iterations
-            try:
-                os.system("mkdir " + self.int8_model)
-            except Exception as e:
-                print("Failed to create {} due to {}".format(self.int8_model,
-                                                             str(e)))
-                sys.exit(-1)
-
-            calibrator = int8_utility.Calibrator(
-                program=infer_program,
-                pretrained_model=model_path,
-                algo=algo,
-                exe=exe,
-                output=self.int8_model,
-                feed_var_names=feed_dict,
-                fetch_list=fetch_targets)
-
-        test_info = []
-        cnt = 0
-        periods = []
-        for batch_id, data in enumerate(val_reader()):
-            image = np.array(
-                [x[0].reshape(image_shape) for x in data]).astype("float32")
-            label = np.array([x[1] for x in data]).astype("int64")
-            label = label.reshape([-1, 1])
-            running_program = calibrator.sampling_program.clone(
-            ) if generate_int8 else infer_program.clone()
-
-            t1 = time.time()
-            _, acc1, _ = exe.run(
-                running_program,
-                feed={feed_dict[0]: image,
-                      feed_dict[1]: label},
-                fetch_list=fetch_targets)
-            t2 = time.time()
-            period = t2 - t1
-            periods.append(period)
-
-            if generate_int8:
-                calibrator.sample_data()
-
-            test_info.append(np.mean(acc1) * len(data))
-            cnt += len(data)
-
-            if (batch_id + 1) % 100 == 0:
-                print("{0} images,".format(batch_id + 1))
-                sys.stdout.flush()
-
-            if (batch_id + 1) == iterations:
-                break
-
-        if generate_int8:
-            calibrator.save_int8_model()
-
-            print(
-                "Calibration is done and the corresponding files are generated at {}".
-                format(os.path.abspath("calibration_out")))
-        else:
-            throughput = cnt / np.sum(periods)
-            latency = np.average(periods)
-            acc1 = np.sum(test_info) / cnt
-            return (throughput, latency, acc1)
-
-
-class TestCalibrationForResnet50(TestCalibration):
-    def download_model(self):
-        # resnet50 fp32 data
-        data_urls = [
-            'http://paddle-inference-dist.bj.bcebos.com/int8/resnet50_int8_model.tar.gz'
-        ]
-        data_md5s = ['4a5194524823d9b76da6e738e1367881']
-        self.model_cache_folder = self.download_data(data_urls, data_md5s,
-                                                     "resnet50_fp32")
-        self.model = "ResNet-50"
-        self.algo = "direct"
-
-    def test_calibration(self):
-        self.download_model()
-        print("Start FP32 inference for {0} on {1} images ...".format(
-            self.model, self.infer_iterations * self.batch_size))
-        (fp32_throughput, fp32_latency,
-         fp32_acc1) = self.run_program(self.model_cache_folder + "/model")
-        print("Start INT8 calibration for {0} on {1} images ...".format(
-            self.model, self.sample_iterations * self.batch_size))
-        self.run_program(
-            self.model_cache_folder + "/model", True, algo=self.algo)
-        print("Start INT8 inference for {0} on {1} images ...".format(
-            self.model, self.infer_iterations * self.batch_size))
-        (int8_throughput, int8_latency,
-         int8_acc1) = self.run_program(self.int8_model)
-        delta_value = fp32_acc1 - int8_acc1
-        self.assertLess(delta_value, 0.01)
-        print(
-            "FP32 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}".
-            format(self.model, self.batch_size, fp32_throughput, fp32_latency,
-                   fp32_acc1))
-        print(
-            "INT8 {0}: batch_size {1}, throughput {2} images/second, latency {3} second, accuracy {4}".
-            format(self.model, self.batch_size, int8_throughput, int8_latency,
-                   int8_acc1))
-        sys.stdout.flush()
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -110,7 +110,6 @@ packages=['paddle',
          'paddle.fluid.contrib',
          'paddle.fluid.contrib.decoder',
          'paddle.fluid.contrib.quantize',
-          'paddle.fluid.contrib.int8_inference',
          'paddle.fluid.contrib.reader',
          'paddle.fluid.contrib.slim',
          'paddle.fluid.contrib.slim.core',