export FLAGS_sync_nccl_allreduce=0 export FLAGS_eager_delete_tensor_gb=1 export FLAGS_fraction_of_gpu_memory_to_use=0.1 port=$1 gpu=$2 export CUDA_VISIBLE_DEVICES=$gpu python serve.py ./infer_model $port