How to run ResNet-50 INT8 with CAPI
Created by: chuanqi129
1. Build Paddle
Requirements: cmake >= 3.0, python protobuf >= 3.0, patchelf
git clone https://github.com/PaddlePaddle/Paddle
mkdir build
cd build
cmake .. -DCMAKE_INSTALL_PREFIX=./tmp -DWITH_GPU=OFF -DWITH_MKLDNN=ON -DWITH_TESTING=ON -DWITH_PROFILER=ON -DWITH_MKL=ON -DWITH_INFERENCE_API_TEST=ON -DCMAKE_BUILD_TYPE=Release -DON_INFER=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
make -j
make install -j
make -j fluid_lib_dist
pip install --force-reinstall --user ./tmp/opt/paddle/share/wheels/paddlepaddle-*.whl
or
pip install paddlepaddle==1.3 --user
2. Run test_calibration.py get resnet50 fp32 and int8 model
cd $Paddle_home
FLAGS_use_mkldnn=true python python/paddle/fluid/contrib/tests/test_calibration.py
Then, you will get resnet50 fp32 pretrained model ~/.cache/paddle/dataset/int8/download/resnet50_fp32/model/ and resnet50 int8 model $Paddle_home/calibration_out/
3. Prune and save model
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
import argparse
def parse_args():
"""
Parse input arguments
"""
parser = argparse.ArgumentParser(description='Prune and save model')
parser.add_argument('--input', dest='input', help='raw model for pruning and save',
default='', type=str)
parser.add_argument('--output', dest='output', help='pruning and save result model',
default='', type=str)
parser.add_argument('--with_transpiler', dest='with_transpiler', help='with transpiler',
default=True, type=bool)
args = parser.parse_args()
return args
def prune(args):
model = args.input
pruned_dir = args.output
place = fluid.CPUPlace()
exe = fluid.Executor(place)
[test_program, feed_dict,
fetch_targets] = fluid.io.load_inference_model(model, exe)
if args.with_transpiler:
t = fluid.transpiler.InferenceTranspiler()
t.transpile(test_program, place)
prune_index = -1
for op_index, op in enumerate(test_program.current_block().ops):
if op.type == "softmax" or op.type == "cross_entropy":
prune_fetch_list = []
for input_name in op.input_names:
if input_name != 'X':
continue
prune_fetch_list.append(op.input(input_name)[0])
prune_index = op_index
break
for index in range(len(test_program.current_block().ops) - 1, prune_index - 1, -1):
test_program.current_block()._remove_op(index)
label_index = feed_dict.index('label')
feed_dict.pop(label_index)
fetch_targets = [test_program.current_block().var(i)
for i in prune_fetch_list]
fluid.io.save_inference_model(pruned_dir, feed_dict, fetch_targets,
exe, test_program, model_filename="model", params_filename="params")
if __name__ == '__main__':
args = parse_args()
print(args)
prune(args)
# save weights as “model” and “params” file
FLAGS_use_mkldnn=true python prune.py --input ~/.cache/paddle/dataset/int8/download/resnet50_fp32/model/ --output resnet50_fp32
FLAGS_use_mkldnn=true python prune.py --input calibration_out/ --output resnet50_int8
4.Run CAPI test application
cd $Paddle_home/build
# run capi test application
./paddle/fluid/inference/tests/api/test_analyzer_resnet50 --infer_model=../resnet50_fp32/ --repeat=100 --gtest_filter=Analyzer_resnet50.profile_mkldnn --batch_size=1 --num_threads=1
./paddle/fluid/inference/tests/api/test_analyzer_resnet50 --infer_model=../resnet50_int8/ --repeat=100 --gtest_filter=Analyzer_resnet50.profile_mkldnn --batch_size=1 --num_threads=1
# you can change the ‘infer_model’ to image classification model you saved.
5. Performance result on SKX-6148
ResNet50 result
Threads Num. | 1 | 2 | 4 | |||
---|---|---|---|---|---|---|
Batch size | 1 | 2 | 4 | 1 | 1 | |
Avg. FPS / per thread | FP32 | 17.92 | 9.23 * 2 | 4.89 * 4 | 17.02 | 16.90 |
INT8 | 28.47 | 14.93 * 2 | 7.67 * 4 | 28.23 | 27.21 | |
INT8/FP32 | 1.59 | 1.62 | 1.57 | 1.66 | 1.61 |
Mobilenet v1 result
Threads Num. | 1 | 2 | 4 | |||
---|---|---|---|---|---|---|
Batch size | 1 | 2 | 4 | 1 | 1 | |
Avg. FPS / per thread | FP32 | 91.2 | 45.46 * 2 | 24.33 * 4 | 93.13 | 87.72 |
INT8 | 155.44 | 81.15 * 2 | 41.76 * 4 | 153.73 | 152.77 | |
INT8/FP32 | 1.70 | 1.79 | 1.72 | 1.65 | 1.74 |
Please review on it. @luotao1 @hshen14