dist_test.sh 2.2 KB
Newer Older
1
#!/bin/bash
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16

# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

17
unset https_proxy http_proxy
18
export FLAGS_rpc_disable_reuse_port=1
19 20 21 22 23 24 25 26 27 28 29 30 31 32

name=${TEST_TARGET_NAME}
TEST_TIMEOUT=${TEST_TIMEOUT}

if [[ ${name}"x" == "x" ]]; then
    echo "can't find ${name}, please set ${TEST_TARGET_NAME} first"
    exit 1
fi

if [[ ${TEST_TIMEOUT}"x" == "x" ]]; then
    echo "can't find ${TEST_TIMEOUT}, please set ${TEST_TIMEOUT} first"
    exit 1
fi

33

34
# rm flag file
35
rm -f ${name}_*.log
36 37 38 39

# start the unit test
run_time=$(( $TEST_TIMEOUT - 10 ))
echo "run_time: ${run_time}"
40 41 42 43 44 45 46 47 48

if [[ ${WITH_COVERAGE} == "ON" ]]; then
    PYTHON_EXEC="python -u -m coverage run --branch -p "
else
    PYTHON_EXEC="python -u "
fi

timeout -s SIGKILL ${run_time} ${PYTHON_EXEC} ${name}.py > ${name}_run.log 2>&1

49 50 51 52 53 54 55
exit_code=$?
if [[ $exit_code -eq 0 ]]; then
    exit 0
fi

echo "${name} faild with ${exit_code}"

56 57 58
echo "after run ${name}"
ps -aux
netstat -anlp
59

60 61
# paddle log
echo "${name} log"
62 63 64 65 66
for log in `ls ${name}_*.log`
do
    printf "\ncat ${log}\n"
    cat -n ${log}
done
67

68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
# check CUDA or ROCM env
GPU_SYS_INFO_CMD=nvidia-smi

which ${GPU_SYS_INFO_CMD}
exit_code=$?
if [[ $exit_code -ne 0 ]]; then
    GPU_SYS_INFO_CMD=rocm-smi
fi

which ${GPU_SYS_INFO_CMD}
exit_code=$?
if [[ $exit_code -ne 0 ]]; then
    echo "nvidia-smi or rocm-smi faild with ${exit_code}"
    exit ${exit_code}
fi

84 85
#display system context
for i in {1..2}; do 
86 87 88
    sleep 3
    ps -aux
    netstat -anlp
89

90 91
    if hash "${GPU_SYS_INFO_CMD}" > /dev/null; then
        ${GPU_SYS_INFO_CMD}
92 93 94
    fi
done

G
gongweibao 已提交
95 96 97
echo "dist space:"
df -h

98
#display /tmp/files
G
gongweibao 已提交
99
echo "ls /tmp/paddle.*"
100 101
ls -l /tmp/paddle.*

G
gongweibao 已提交
102 103 104
echo "ls -l ./"
ls -l ./

105
exit 1