run_float16_demo.sh 3.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
#!/bin/bash

BUILD_PATH=/paddle/fp16_build
WHEEL_PATH=$BUILD_PATH/python/dist
INFER_PATH=$BUILD_PATH/paddle/fluid/inference/tests/book
DEMO_PATH=/paddle/contrib/float16

# Use the single most powerful CUDA GPU on your machine
export CUDA_VISIBLE_DEVICES=0

# Build the PaddlePaddle Fluid wheel package and install it.
mkdir -p $BUILD_PATH && cd $BUILD_PATH
cmake .. -DWITH_AVX=OFF \
         -DWITH_MKL=OFF \
         -DWITH_GPU=ON \
         -DWITH_TESTING=ON \
         -DWITH_TIMER=ON \
         -DWITH_PROFILER=ON \
         -DWITH_FLUID_ONLY=ON
make -j `nproc`
pip install -U "$WHEEL_PATH/$(ls $WHEEL_PATH)"

cd $DEMO_PATH
# Clear previous log results
rm -f *.log

# Test the float16 inference accuracy of resnet32 on cifar10 data set
stdbuf -oL python float16_inference_demo.py \
       --data_set=cifar10 \
       --model=resnet \
       --threshold=0.6 \
       --repeat=10 \
       2>&1 | tee -a float16_inference_accuracy.log

# Sleep to cool down the GPU for consistent benchmarking
sleep 2m

# benchmarking parameters
REPEAT=1000
MAXIMUM_BATCH_SIZE=512

for ((batch_size = 1; batch_size <= MAXIMUM_BATCH_SIZE; batch_size *= 2)); 
do

  # Test inference benchmark of vgg16 on imagenet
  stdbuf -oL python float16_inference_demo.py \
         --data_set=imagenet \
         --model=vgg \
         --threshold=0.001 \
         --repeat=1 \

  $INFER_PATH/test_inference_image_classification_vgg \
      --data_set=imagenet \
      --dirname=$DEMO_PATH/image_classification_imagenet_vgg.inference.model \
      --fp16_dirname=$DEMO_PATH/float16_image_classification_imagenet_vgg.inference.model \
      --repeat=$REPEAT \
      --batch_size=$batch_size \
      --skip_cpu=true \
      2>&1 | tee -a imagenet_vgg16_benchmark.log

  sleep 2m

  # Test inference benchmark of resnet50 on imagenet
  stdbuf -oL python float16_inference_demo.py \
         --data_set=imagenet \
         --model=resnet \
         --threshold=0.001 \
         --repeat=1 \

  $INFER_PATH/test_inference_image_classification_resnet \
      --data_set=imagenet \
      --dirname=$DEMO_PATH/image_classification_imagenet_resnet.inference.model \
      --fp16_dirname=$DEMO_PATH/float16_image_classification_imagenet_resnet.inference.model \
      --repeat=$REPEAT \
      --batch_size=$batch_size \
      --skip_cpu=true \
      2>&1 | tee -a imagenet_resnet50_benchmark.log

  sleep 2m

  # Test inference benchmark of vgg16 on cifar10
  stdbuf -oL python float16_inference_demo.py \
         --data_set=cifar10 \
         --model=vgg \
         --threshold=0.001 \
         --repeat=1 \

  $INFER_PATH/test_inference_image_classification_vgg \
      --data_set=cifar10 \
      --dirname=$DEMO_PATH/image_classification_cifar10_vgg.inference.model \
      --fp16_dirname=$DEMO_PATH/float16_image_classification_cifar10_vgg.inference.model \
      --repeat=$REPEAT \
      --batch_size=$batch_size \
      --skip_cpu=true \
      2>&1 | tee -a cifar10_vgg16_benchmark.log

  sleep 1m

  # Test inference benchmark of resnet32 on cifar10
  stdbuf -oL python float16_inference_demo.py \
         --data_set=cifar10 \
         --model=resnet \
         --threshold=0.001 \
         --repeat=1 \

  $INFER_PATH/test_inference_image_classification_vgg \
      --data_set=cifar10 \
      --dirname=$DEMO_PATH/image_classification_cifar10_resnet.inference.model \
      --fp16_dirname=$DEMO_PATH/float16_image_classification_cifar10_resnet.inference.model \
      --repeat=$REPEAT \
      --batch_size=$batch_size \
      --skip_cpu=true \
      2>&1 | tee -a cifar10_resnet32_benchmark.log

  sleep 1m

done