install.sh 4.2 KB
Newer Older
D
digger-yu 已提交
1
#!/usr/bin/env bash
J
Jeff Rasley 已提交
2 3 4 5 6 7 8 9 10 11 12 13

set -e
err_report() {
    echo "Error on line $1"
    echo "Fail to install deepspeed"
}
trap 'err_report $LINENO' ERR

usage() {
  echo """
Usage: install.sh [options...]

14
By default will install deepspeed and all third party dependencies across all machines listed in
J
Jeff Rasley 已提交
15 16 17
hostfile (hostfile: /job/hostfile). If no hostfile exists, will only install locally

[optional]
18
    -l, --local_only        Install only on local machine
19 20 21
    -s, --pip_sudo          Run pip install with sudo (default: no sudo)
    -r, --allow_sudo        Allow script to be run by root (probably don't want this, instead use --pip_sudo)
    -n, --no_clean          Do not clean prior build state, by default prior build files are removed before building wheels
22
    -m, --pip_mirror        Use the specified pip mirror (default: the default pip mirror)
23
    -H, --hostfile          Path to MPI-style hostfile (default: /job/hostfile)
24
    -e, --examples          Checkout deepspeed example submodule (no install)
25
    -v, --verbose           Verbose logging
J
Jeff Rasley 已提交
26 27 28 29 30 31
    -h, --help              This help text
  """
}

ds_only=0
local_only=0
32
pip_sudo=0
J
Jeff Rasley 已提交
33
entire_dlts_job=1
34
hostfile=/job/hostfile
35
pip_mirror=""
J
Jeff Rasley 已提交
36
skip_requirements=0
37 38
allow_sudo=0
no_clean=0
39
verbose=0
40
examples=0
J
Jeff Rasley 已提交
41 42 43 44 45

while [[ $# -gt 0 ]]
do
key="$1"
case $key in
46 47 48 49
    -l|--local_only)
    local_only=1;
    shift
    ;;
50 51 52 53 54 55 56 57 58
    -s|--pip_sudo)
    pip_sudo=1;
    shift
    ;;
    -m|--pip_mirror)
    pip_mirror=$2;
    shift
    shift
    ;;
59 60
    -v|--verbose)
    verbose=1;
J
Jeff Rasley 已提交
61 62
    shift
    ;;
63 64 65 66 67 68 69 70
    -r|--allow_sudo)
    allow_sudo=1;
    shift
    ;;
    -n|--no_clean)
    no_clean=1;
    shift
    ;;
71 72 73
    -H|--hostfile)
    hostfile=$2
    if [ ! -f $2 ]; then
74
        echo "User-provided hostfile does not exist at $hostfile, exiting"
75 76 77 78 79
        exit 1
    fi
    shift
    shift
    ;;
80 81 82 83
    -e|--examples)
    examples=1
    shift
    ;;
J
Jeff Rasley 已提交
84 85 86 87 88
    -h|--help)
    usage
    exit 0
    ;;
    *)
89
    echo "Unknown argument(s)"
J
Jeff Rasley 已提交
90 91 92 93 94 95 96
    usage
    exit 1
    shift
    ;;
esac
done

97 98 99 100 101 102 103 104 105
user=`whoami`
if [ "$allow_sudo" == "0" ]; then
    if [ "$user" == "root" ]; then
        echo "WARNING: running as root, if you want to install DeepSpeed with sudo please use -s/--pip_sudo instead"
        usage
        exit 1
    fi
fi

106 107 108
if [ "$examples" == "1" ]; then
    git submodule update --init --recursive
    exit 0
J
Jeff Rasley 已提交
109 110
fi

111 112
if [ "$verbose" == "1" ]; then
    VERBOSE="-v"
113
    PIP_VERBOSE=""
114 115
else
    VERBOSE=""
116
    PIP_VERBOSE="--disable-pip-version-check"
117 118
fi

119 120 121
rm_if_exist() {
    echo "Attempting to remove $1"
    if [ -f $1 ]; then
122
        rm $VERBOSE $1
123
    elif [ -d $1 ]; then
D
digger-yu 已提交
124
        rm -rf $VERBOSE $1
125 126 127 128 129
    fi
}

if [ "$no_clean" == "0" ]; then
    # remove deepspeed build files
130
    rm_if_exist deepspeed/git_version_info_installed.py
131 132 133 134 135
    rm_if_exist dist
    rm_if_exist build
    rm_if_exist deepspeed.egg-info
fi

136
if [ "$pip_sudo" == "1" ]; then
137
    PIP_SUDO="sudo -H"
138
else
139
    PIP_SUDO=""
140 141 142
fi

if [ "$pip_mirror" != "" ]; then
143
    PIP_INSTALL="pip install $VERBOSE $PIP_VERBOSE -i $pip_mirror"
144
else
145
    PIP_INSTALL="pip install $VERBOSE $PIP_VERBOSE"
146 147
fi

148

149
if [ ! -f $hostfile ]; then
150 151
    echo "No hostfile exists at $hostfile, installing locally"
    local_only=1
J
Jeff Rasley 已提交
152 153
fi

154 155
echo "Building deepspeed wheel"
python setup.py $VERBOSE bdist_wheel
J
Jeff Rasley 已提交
156

157
if [ "$local_only" == "1" ]; then
158
    echo "Installing deepspeed"
J
Jeff Rasley 已提交
159
#    $PIP_SUDO pip uninstall -y deepspeed
160 161
    $PIP_SUDO $PIP_INSTALL dist/deepspeed*.whl
    ds_report
J
Jeff Rasley 已提交
162 163 164 165 166 167 168 169
else
    local_path=`pwd`
    if [ -f $hostfile ]; then
        hosts=`cat $hostfile | awk '{print $1}' | paste -sd "," -`;
    else
        echo "hostfile not found, cannot proceed"
        exit 1
    fi
170
    export PDSH_RCMD_TYPE=ssh
171
    tmp_wheel_path="/tmp/deepspeed_wheels"
J
Jeff Rasley 已提交
172

S
Samyam Rajbhandari 已提交
173
    pdsh -w $hosts "if [ -d $tmp_wheel_path ]; then rm $tmp_wheel_path/*; else mkdir -pv $tmp_wheel_path; fi"
J
Jeff Rasley 已提交
174
    pdcp -w $hosts requirements/requirements.txt ${tmp_wheel_path}/
175 176 177 178 179 180

    echo "Installing deepspeed"
    pdsh -w $hosts "$PIP_SUDO pip uninstall -y deepspeed"
    pdcp -w $hosts dist/deepspeed*.whl $tmp_wheel_path/
    pdsh -w $hosts "$PIP_SUDO $PIP_INSTALL $tmp_wheel_path/deepspeed*.whl"
    pdsh -w $hosts "ds_report"
181
    pdsh -w $hosts "if [ -d $tmp_wheel_path ]; then rm $tmp_wheel_path/*.whl; rm $tmp_wheel_path/*.txt; rmdir $tmp_wheel_path; fi"
J
Jeff Rasley 已提交
182
fi