test_train_inference_python.sh 15.9 KB
Newer Older
S
stephon 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
#!/bin/bash
FILENAME=$1
source test_tipc/common_func.sh

# MODE be one of ['lite_train_lite_infer' 'lite_train_whole_infer' 'whole_train_whole_infer', 'whole_infer', 'klquant_whole_infer']
MODE=$2

dataline=$(cat ${FILENAME})

# parser params
IFS=$'\n'
lines=(${dataline})

# The training params
model_name=$(func_parser_value "${lines[1]}")
python=$(func_parser_value "${lines[2]}")
gpu_list=$(func_parser_value "${lines[3]}")
train_use_gpu_key=$(func_parser_key "${lines[4]}")
train_use_gpu_value=$(func_parser_value "${lines[4]}")
autocast_list=$(func_parser_value "${lines[5]}")
autocast_key=$(func_parser_key "${lines[5]}")
epoch_key=$(func_parser_key "${lines[6]}")
epoch_num=$(func_parser_params "${lines[6]}")
save_model_key=$(func_parser_key "${lines[7]}")
train_batch_key=$(func_parser_key "${lines[8]}")
D
dongshuilong 已提交
26
train_batch_value=$(func_parser_value "${lines[8]}")
S
stephon 已提交
27 28 29 30 31 32 33 34
pretrain_model_key=$(func_parser_key "${lines[9]}")
pretrain_model_value=$(func_parser_value "${lines[9]}")
train_model_name=$(func_parser_value "${lines[10]}")
train_infer_img_dir=$(func_parser_value "${lines[11]}")
train_param_key1=$(func_parser_key "${lines[12]}")
train_param_value1=$(func_parser_value "${lines[12]}")

trainer_list=$(func_parser_value "${lines[14]}")
35

S
stephon 已提交
36 37 38 39 40 41 42 43
trainer_norm=$(func_parser_key "${lines[15]}")
norm_trainer=$(func_parser_value "${lines[15]}")
pact_key=$(func_parser_key "${lines[16]}")
pact_trainer=$(func_parser_value "${lines[16]}")
fpgm_key=$(func_parser_key "${lines[17]}")
fpgm_trainer=$(func_parser_value "${lines[17]}")
distill_key=$(func_parser_key "${lines[18]}")
distill_trainer=$(func_parser_value "${lines[18]}")
44 45
to_static_key=$(func_parser_key "${lines[19]}")
to_static_trainer=$(func_parser_value "${lines[19]}")
S
stephon 已提交
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
trainer_key2=$(func_parser_key "${lines[20]}")
trainer_value2=$(func_parser_value "${lines[20]}")

eval_py=$(func_parser_value "${lines[23]}")
eval_key1=$(func_parser_key "${lines[24]}")
eval_value1=$(func_parser_value "${lines[24]}")

save_infer_key=$(func_parser_key "${lines[27]}")
export_weight=$(func_parser_key "${lines[28]}")
norm_export=$(func_parser_value "${lines[29]}")
pact_export=$(func_parser_value "${lines[30]}")
fpgm_export=$(func_parser_value "${lines[31]}")
distill_export=$(func_parser_value "${lines[32]}")
kl_quant_cmd_key=$(func_parser_key "${lines[33]}")
kl_quant_cmd_value=$(func_parser_value "${lines[33]}")
export_key2=$(func_parser_key "${lines[34]}")
export_value2=$(func_parser_value "${lines[34]}")

H
HydrogenSulfate 已提交
64
# parser inference model
S
stephon 已提交
65 66 67 68
infer_model_dir_list=$(func_parser_value "${lines[36]}")
infer_export_flag=$(func_parser_value "${lines[37]}")
infer_is_quant=$(func_parser_value "${lines[38]}")

H
HydrogenSulfate 已提交
69
# parser inference
S
stephon 已提交
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
inference_py=$(func_parser_value "${lines[39]}")
use_gpu_key=$(func_parser_key "${lines[40]}")
use_gpu_list=$(func_parser_value "${lines[40]}")
use_mkldnn_key=$(func_parser_key "${lines[41]}")
use_mkldnn_list=$(func_parser_value "${lines[41]}")
cpu_threads_key=$(func_parser_key "${lines[42]}")
cpu_threads_list=$(func_parser_value "${lines[42]}")
batch_size_key=$(func_parser_key "${lines[43]}")
batch_size_list=$(func_parser_value "${lines[43]}")
use_trt_key=$(func_parser_key "${lines[44]}")
use_trt_list=$(func_parser_value "${lines[44]}")
precision_key=$(func_parser_key "${lines[45]}")
precision_list=$(func_parser_value "${lines[45]}")
infer_model_key=$(func_parser_key "${lines[46]}")
image_dir_key=$(func_parser_key "${lines[47]}")
infer_img_dir=$(func_parser_value "${lines[47]}")
save_log_key=$(func_parser_key "${lines[48]}")
benchmark_key=$(func_parser_key "${lines[49]}")
benchmark_value=$(func_parser_value "${lines[49]}")
infer_key1=$(func_parser_key "${lines[50]}")
infer_value1=$(func_parser_value "${lines[50]}")
D
dongshuilong 已提交
91
if [ ! $epoch_num ]; then
92
    epoch_num=2
D
dongshuilong 已提交
93
fi
H
HydrogenSulfate 已提交
94
if [[ $MODE = 'benchmark_train' ]]; then
95
    epoch_num=1
D
dongshuilong 已提交
96 97
fi

G
gaotingquan 已提交
98 99
CLS_ROOT_PATH=$(pwd)
LOG_PATH="${CLS_ROOT_PATH}/test_tipc/output/${model_name}/${MODE}"
S
stephon 已提交
100 101 102
mkdir -p ${LOG_PATH}
status_log="${LOG_PATH}/results_python.log"

103
function func_inference() {
S
stephon 已提交
104 105 106 107 108 109 110
    IFS='|'
    _python=$1
    _script=$2
    _model_dir=$3
    _log_path=$4
    _img_dir=$5
    _flag_quant=$6
G
gaotingquan 已提交
111
    _gpu=$7
H
HydrogenSulfate 已提交
112
    # inference
S
stephon 已提交
113 114 115 116 117
    for use_gpu in ${use_gpu_list[*]}; do
        if [ ${use_gpu} = "False" ] || [ ${use_gpu} = "cpu" ]; then
            for use_mkldnn in ${use_mkldnn_list[*]}; do
                for threads in ${cpu_threads_list[*]}; do
                    for batch_size in ${batch_size_list[*]}; do
G
gaotingquan 已提交
118 119 120 121 122 123 124 125 126 127 128 129
                        for precision in ${precision_list[*]}; do
                            _save_log_path="${_log_path}/python_infer_cpu_gpus_${_gpu}_usemkldnn_${use_mkldnn}_threads_${threads}_precision_${precision}_batchsize_${batch_size}.log"
                            set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}")
                            set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}")
                            set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}")
                            set_cpu_threads=$(func_set_params "${cpu_threads_key}" "${threads}")
                            set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}")
                            set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}")
                            command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_infer_params1} > ${_save_log_path} 2>&1 "
                            eval $command
                            last_status=${PIPESTATUS[0]}
                            eval "cat ${_save_log_path}"
G
gaotingquan 已提交
130
                            status_check $last_status "${command}" "${status_log}" "${model_name}" "${_save_log_path}"
G
gaotingquan 已提交
131
                        done
S
stephon 已提交
132 133 134 135 136 137 138 139 140 141
                    done
                done
            done
        elif [ ${use_gpu} = "True" ] || [ ${use_gpu} = "gpu" ]; then
            for use_trt in ${use_trt_list[*]}; do
                for precision in ${precision_list[*]}; do
                    if [ ${precision} = "True" ] && [ ${use_trt} = "False" ]; then
                        continue
                    fi
                    for batch_size in ${batch_size_list[*]}; do
G
gaotingquan 已提交
142
                        _save_log_path="${_log_path}/python_infer_gpu_gpus_${_gpu}_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log"
S
stephon 已提交
143 144 145 146 147 148 149 150 151 152
                        set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}")
                        set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}")
                        set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}")
                        set_tensorrt=$(func_set_params "${use_trt_key}" "${use_trt}")
                        set_precision=$(func_set_params "${precision_key}" "${precision}")
                        set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}")
                        command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} > ${_save_log_path} 2>&1 "
                        eval $command
                        last_status=${PIPESTATUS[0]}
                        eval "cat ${_save_log_path}"
G
gaotingquan 已提交
153
                        status_check $last_status "${command}" "${status_log}" "${model_name}" "${_save_log_path}"
S
stephon 已提交
154 155 156 157 158 159 160 161 162 163
                    done
                done
            done
        else
            echo "Does not support hardware other than CPU and GPU Currently!"
        fi
    done
}


H
HydrogenSulfate 已提交
164
if [[ ${MODE} = "whole_infer" ]]; then
S
stephon 已提交
165 166
    # for kl_quant
    if [ ${kl_quant_cmd_value} != "null" ] && [ ${kl_quant_cmd_value} != "False" ]; then
167
        echo "kl_quant"
G
gaotingquan 已提交
168
        log_path="${LOG_PATH}/export.log"
G
gaotingquan 已提交
169
        command="${python} ${kl_quant_cmd_value} > ${log_path} 2>&1"
170
        echo ${command}
171 172
        eval $command
        last_status=${PIPESTATUS[0]}
G
gaotingquan 已提交
173
        status_check $last_status "${command}" "${status_log}" "${model_name}" "${log_path}"
H
HydrogenSulfate 已提交
174
        cd ${infer_model_dir_list}/quant_post_static_model
G
gaotingquan 已提交
175 176
        ln -s model.pdmodel inference.pdmodel
        ln -s model.pdiparams inference.pdiparams
177 178
        cd ../../deploy
        is_quant=True
G
gaotingquan 已提交
179
        gpu=0
G
gaotingquan 已提交
180
        func_inference "${python}" "${inference_py}" "../${infer_model_dir_list}/quant_post_static_model" "${LOG_PATH}" "${infer_img_dir}" "${is_quant}" "${gpu}"
181
        cd ..
S
stephon 已提交
182 183 184 185 186 187 188 189 190
    fi
else
    IFS="|"
    export Count=0
    USE_GPU_KEY=(${train_use_gpu_value})
    for gpu in ${gpu_list[*]}; do
        train_use_gpu=${USE_GPU_KEY[Count]}
        Count=$(($Count + 1))
        ips=""
191
        if [ ${gpu} = "-1" ]; then
S
stephon 已提交
192
            env=""
193
        elif [ ${#gpu} -le 1 ]; then
S
stephon 已提交
194 195
            env="export CUDA_VISIBLE_DEVICES=${gpu}"
            eval ${env}
196
        elif [ ${#gpu} -le 15 ]; then
S
stephon 已提交
197 198 199 200 201 202 203 204 205 206 207 208 209
            IFS=","
            array=(${gpu})
            env="export CUDA_VISIBLE_DEVICES=${array[0]}"
            IFS="|"
        else
            IFS=";"
            array=(${gpu})
            ips=${array[0]}
            gpu=${array[1]}
            IFS="|"
            env=" "
        fi
        for autocast in ${autocast_list[*]}; do
H
HydrogenSulfate 已提交
210
            for trainer in ${trainer_list[*]}; do
S
stephon 已提交
211 212 213 214 215 216 217 218 219 220 221
                flag_quant=False
                if [ ${trainer} = ${pact_key} ]; then
                    run_train=${pact_trainer}
                    run_export=${pact_export}
                    flag_quant=True
                elif [ ${trainer} = "${fpgm_key}" ]; then
                    run_train=${fpgm_trainer}
                    run_export=${fpgm_export}
                elif [ ${trainer} = "${distill_key}" ]; then
                    run_train=${distill_trainer}
                    run_export=${distill_export}
222 223 224 225 226 227
                # In case of @to_static, we re-used norm_traier,
                # but append "-o Global.to_static=True" for config
                # to trigger "apply_to_static" logic in 'engine.py'
                elif [ ${trainer} = "${to_static_key}" ]; then
                    run_train="${norm_trainer}  ${to_static_trainer}"
                    run_export=${norm_export}
S
stephon 已提交
228 229 230 231 232 233 234 235 236 237 238
                elif [[ ${trainer} = ${trainer_key2} ]]; then
                    run_train=${trainer_value2}
                    run_export=${export_value2}
                else
                    run_train=${norm_trainer}
                    run_export=${norm_export}
                fi

                if [ ${run_train} = "null" ]; then
                    continue
                fi
H
HydrogenSulfate 已提交
239

G
gaotingquan 已提交
240
                set_autocast=$(func_set_amp_params "${autocast_key}" "${autocast}")
S
stephon 已提交
241 242 243 244 245
                set_epoch=$(func_set_params "${epoch_key}" "${epoch_num}")
                set_pretrain=$(func_set_params "${pretrain_model_key}" "${pretrain_model_value}")
                set_batchsize=$(func_set_params "${train_batch_key}" "${train_batch_value}")
                set_train_params1=$(func_set_params "${train_param_key1}" "${train_param_value1}")
                set_use_gpu=$(func_set_params "${train_use_gpu_key}" "${train_use_gpu_value}")
246
                if [ ${#ips} -le 15 ]; then
H
HydrogenSulfate 已提交
247 248
                    # if length of ips >= 15, then it is seen as multi-machine
                    # 15 is the min length of ips info for multi-machine: 0.0.0.0,0.0.0.0
G
gaotingquan 已提交
249
                    save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_1"
S
stephon 已提交
250 251 252 253 254 255 256 257
                    nodes=1
                else
                    IFS=","
                    ips_array=(${ips})
                    IFS="|"
                    nodes=${#ips_array[@]}
                    save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}"
                fi
H
HydrogenSulfate 已提交
258

S
stephon 已提交
259
                # load pretrain from norm training if current trainer is pact or fpgm trainer
D
dongshuilong 已提交
260 261 262
                # if [ ${trainer} = ${pact_key} ] || [ ${trainer} = ${fpgm_key} ]; then
                #    set_pretrain="${load_norm_train_model}"
                # fi
S
stephon 已提交
263 264

                set_save_model=$(func_set_params "${save_model_key}" "${save_log}")
265
                if [ ${#gpu} -le 2 ]; then # train with cpu or single gpu
G
gaotingquan 已提交
266
                    cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} "
267
                elif [ ${#ips} -le 15 ]; then # train with multi-gpu
268
                    cmd="${python} -m paddle.distributed.launch --devices=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1}"
269
                else # train with multi-machine
270
                    cmd="${python} -m paddle.distributed.launch --ips=${ips} --devices=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1}"
S
stephon 已提交
271 272
                fi
                # run train
273 274
                # export FLAGS_cudnn_deterministic=True
                sleep 5
S
stephon 已提交
275
                eval $cmd
276
                if [[ $model_name == *GeneralRecognition* ]]; then
277 278 279 280
                    eval "cat ${save_log}/RecModel/train.log >> ${save_log}.log"
                else
                    eval "cat ${save_log}/${model_name}/train.log >> ${save_log}.log"
                fi
G
gaotingquan 已提交
281
                status_check $? "${cmd}" "${status_log}" "${model_name}" "${save_log}.log"
282
                sleep 5
H
HydrogenSulfate 已提交
283

284
                if [[ $model_name == *GeneralRecognition* ]]; then
285 286
                    set_eval_pretrain=$(func_set_params "${pretrain_model_key}" "${save_log}/RecModel/${train_model_name}")
                else
D
dongshuilong 已提交
287
                    set_eval_pretrain=$(func_set_params "${pretrain_model_key}" "${save_log}/${model_name}/${train_model_name}")
288
                fi
H
HydrogenSulfate 已提交
289
                # save norm trained models to set pretrain for pact training and fpgm training
290
                if [[ ${trainer} = ${trainer_norm}  ||  ${trainer} = ${pact_key} ]]; then
S
stephon 已提交
291 292
                    load_norm_train_model=${set_eval_pretrain}
                fi
H
HydrogenSulfate 已提交
293
                # run eval
S
stephon 已提交
294 295
                if [ ${eval_py} != "null" ]; then
                    set_eval_params1=$(func_set_params "${eval_key1}" "${eval_value1}")
G
gaotingquan 已提交
296 297
                    eval_log_path="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}_eval.log"
                    eval_cmd="${python} ${eval_py} ${set_eval_pretrain} ${set_use_gpu} ${set_eval_params1} > ${eval_log_path} 2>&1"
S
stephon 已提交
298
                    eval $eval_cmd
G
gaotingquan 已提交
299
                    status_check $? "${eval_cmd}" "${status_log}" "${model_name}" "${eval_log_path}"
300
                    sleep 5
S
stephon 已提交
301 302
                fi
                # run export model
H
HydrogenSulfate 已提交
303
                if [ ${run_export} != "null" ]; then
S
stephon 已提交
304 305
                    # run export model
                    save_infer_path="${save_log}"
306
                    if [[ $model_name == *GeneralRecognition* ]]; then
307 308 309 310
                        set_eval_pretrain=$(func_set_params "${pretrain_model_key}" "${save_log}/RecModel/${train_model_name}")
                    else
                        set_export_weight=$(func_set_params "${export_weight}" "${save_log}/${model_name}/${train_model_name}")
                    fi
S
stephon 已提交
311
                    set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_path}")
G
gaotingquan 已提交
312 313
                    export_log_path="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}_export.log"
                    export_cmd="${python} ${run_export} ${set_export_weight} ${set_save_infer_key} > ${export_log_path} 2>&1"
S
stephon 已提交
314
                    eval $export_cmd
G
gaotingquan 已提交
315
                    status_check $? "${export_cmd}" "${status_log}" "${model_name}" "${export_log_path}"
S
stephon 已提交
316

G
gaotingquan 已提交
317
                    # run inference
S
stephon 已提交
318 319
                    eval $env
                    save_infer_path="${save_log}"
320
                    cd deploy
G
gaotingquan 已提交
321
                    func_inference "${python}" "${inference_py}" "${save_infer_path}" "${LOG_PATH}" "${infer_img_dir}" "${flag_quant}" "${gpu}"
322
                    cd ..
S
stephon 已提交
323 324
                fi
                eval "unset CUDA_VISIBLE_DEVICES"
325 326 327 328
            done # done with:    for trainer in ${trainer_list[*]}; do
        done     # done with:    for autocast in ${autocast_list[*]}; do
    done         # done with:    for gpu in ${gpu_list[*]}; do
fi               # end if [ ${MODE} = "infer" ]; then