worker.sh 2.4 KB
Newer Older
T
tangwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
#!/bin/bash

###################################################
# Usage: job.sh
# Description: run job on mpi per node
###################################################

# ---------------------------------------------------------------------------- #
#                            variable define                                   #
# ---------------------------------------------------------------------------- #
declare g_curPath=""
declare g_scriptName=""
declare g_workPath=""
declare g_run_stage=""

# ---------------------------------------------------------------------------- #
#                             const define                                     #
# ---------------------------------------------------------------------------- #
declare -r FLAGS_communicator_thread_pool_size=5
declare -r FLAGS_communicator_send_queue_size=18
declare -r FLAGS_communicator_thread_pool_size=20
declare -r FLAGS_communicator_max_merge_var_num=18
################################################################################

#-----------------------------------------------------------------------------------------------------------------
#fun : check function return code
#param : N/A
#return : 0 -- success; not 0 -- failure
#-----------------------------------------------------------------------------------------------------------------
function check_error() {
  if [ ${?} -ne 0 ]; then
    echo "execute " + $g_run_stage + " raise exception! please check ..."
    exit 1
  fi
}

#-----------------------------------------------------------------------------------------------------------------
#fun : check function return code
#param : N/A
#return : 0 -- success; not 0 -- failure
#-----------------------------------------------------------------------------------------------------------------
function env_prepare() {
  g_run_stage="env_prepare"
T
tangwei 已提交
44 45 46 47
  WORKDIR=$(pwd)
  mpirun -npernode 1 mv package/* ./
  echo "current:"$WORKDIR
  export LIBRARY_PATH=$WORKDIR/python/lib:$LIBRARY_PATH
T
tangwei 已提交
48

T
tangwei 已提交
49 50
  mpirun -npernode 1 python/bin/python -m pip install whl/fleet_rec-0.0.2-py2-none-any.whl --index-url=http://pip.baidu.com/pypi/simple --trusted-host pip.baidu.com >/dev/null
  check_error
T
tangwei 已提交
51 52
}

T
tangwei 已提交
53 54 55 56
function run() {
  echo "run"
  g_run_stage="run"
  mpirun -npernode 2 -timestamp-output -tag-output -machinefile ${PBS_NODEFILE} python/bin/python -u -m fleetrec.run -m fleetrec.models.rank.dnn --engine cluster --role worker
T
tangwei 已提交
57 58 59 60
}

function main() {
  env_prepare
T
tangwei 已提交
61
  run
T
tangwei 已提交
62 63 64
}

main