diff --git a/python/paddle/fluid/contrib/int8_inference/README.md b/python/paddle/fluid/contrib/int8_inference/README.md index 3228610f968c9bec86d6bf781585038ffd095bce..7dc7c8d2a374a1d589ccb072b5bf6cce1f6ddda7 100644 --- a/python/paddle/fluid/contrib/int8_inference/README.md +++ b/python/paddle/fluid/contrib/int8_inference/README.md @@ -6,7 +6,7 @@ PaddlePaddle supports offline INT8 calibration to accelerate the inference speed You need to install at least PaddlePaddle-1.3 python package `pip install paddlepaddle==1.3`. ## 1. How to generate INT8 model -You can refer to the unit test in [test_calibration.py](../tests/test_calibration.py). Basically, there are three steps: +You can refer to the unit test in [test_calibration_resnet50.py](../tests/test_calibration_resnet50.py). Basically, there are three steps: * Construct calibration object. ```python @@ -68,18 +68,19 @@ Notes: * The INT8 theoretical speedup is 4X on Intel® Xeon® Cascadelake Server (please refer to `The theoretical peak compute gains are 4x int8 OPS over fp32 OPS.` in [Reference](https://software.intel.com/en-us/articles/lower-numerical-precision-deep-learning-inference-and-training "Reference")). Therefore, op-level gain is 4X and topology-level is smaller. ## 4. How to reproduce the results -* Small dataset (Single core) +* Small dataset for ResNet-50 (Single core) ```bash -FLAGS_use_mkldnn=true python python/paddle/fluid/contrib/tests/test_calibration.py +FLAGS_use_mkldnn=true python python/paddle/fluid/contrib/tests/test_calibration_resnet50.py ``` +>Note: Change `test_calibration_resnet50.py` to `test_calibration_mobilenetv1.py` for MobileNet-V1. Same for the following commands. -* Full dataset (Single core) +* Full dataset for ResNet-50 (Single core) ```bash -FLAGS_use_mkldnn=true DATASET=full python python/paddle/fluid/contrib/tests/test_calibration.py +FLAGS_use_mkldnn=true DATASET=full python python/paddle/fluid/contrib/tests/test_calibration_resnet50.py ``` -* Full dataset (Multi-core) +* Full dataset for ResNet-50 (Multi-core) ```bash -FLAGS_use_mkldnn=true OMP_NUM_THREADS=20 DATASET=full python python/paddle/fluid/contrib/tests/test_calibration.py +FLAGS_use_mkldnn=true OMP_NUM_THREADS=20 DATASET=full python python/paddle/fluid/contrib/tests/test_calibration_resnet50.py ``` > Notes: This is an example command with 20 cores by using set `OMP_NUM_THREADS` value. diff --git a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt index e61e93da3f0321a5c627912d540cda28400ea790..c59df49f6260c25ebdb0290b30d3bdb78b98a3c6 100644 --- a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt +++ b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt @@ -42,10 +42,10 @@ if(LINUX AND WITH_MKLDNN) inference_analysis_python_api_int8_test(test_slim_int8_googlenet ${INT8_GOOGLENET_MODEL_DIR} ${INT8_DATA_DIR} ${MKLDNN_INT8_TEST_FILE}) # mobilenet int8 - set(INT8_MOBILENET_MODEL_DIR "${INT8_DATA_DIR}/mobilenet") + set(INT8_MOBILENET_MODEL_DIR "${INT8_DATA_DIR}/mobilenetv1") inference_analysis_python_api_int8_test(test_slim_int8_mobilenet ${INT8_MOBILENET_MODEL_DIR} ${INT8_DATA_DIR} ${MKLDNN_INT8_TEST_FILE}) - # temporarily adding WITH_SLIM_MKLDNN_FULL_TEST FLAG for QA testing the following UTs locally, + # temporarily adding WITH_SLIM_MKLDNN_FULL_TEST FLAG for QA testing the following UTs locally, # since the following UTs cost too much time on CI test. if (WITH_SLIM_MKLDNN_FULL_TEST) # resnet50 int8 @@ -70,7 +70,7 @@ if(LINUX AND WITH_MKLDNN) endif() endif() -# Since test_mkldnn_int8_quantization_strategy only supports testing on Linux +# Since test_mkldnn_int8_quantization_strategy only supports testing on Linux # with MKL-DNN, we remove it here for not repeating test, or not testing on other systems. list(REMOVE_ITEM TEST_OPS test_mkldnn_int8_quantization_strategy)