submit.sh 2.8 KB
Newer Older
T
tangwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
#!/bin/bash

###################################################
# Usage: submit.sh
# Description: run mpi submit clinet implement
###################################################

#-----------------------------------------------------------------------------------------------------------------
#fun : get argument from env, set it into variables
#param : N/A
#return : 0 -- success; not 0 -- failure
#-----------------------------------------------------------------------------------------------------------------
function vars_get_from_env() {
T
tangwei 已提交
14
  echo "xx"
T
tangwei 已提交
15 16 17 18 19 20 21 22 23 24
}

#-----------------------------------------------------------------------------------------------------------------
#fun : package
#param : N/A
#return : 0 -- success; not 0 -- failure
#-----------------------------------------------------------------------------------------------------------------
function package() {
  g_run_stage="package"

T
tangwei 已提交
25 26 27 28 29 30 31 32 33 34 35 36 37
  temp=${engine_temp_path}
  echo "package temp dir: " ${temp}

  cp ${engine_worker} ${temp}
  echo "copy job.sh from " ${engine_worker} " to " ${temp}

  mkdir ${temp}/python
  cp -r ${engine_package_python}/* ${temp}/python/
  echo "copy python from " ${engine_package_python} " to " ${temp}

  mkdir ${temp}/whl
  cp ${engine_package_paddlerec}  ${temp}/whl/
  echo "copy " ${engine_package_paddlerec} " to " ${temp}"/whl/"
T
tangwei 已提交
38 39 40 41 42 43 44 45
}

#-----------------------------------------------------------------------------------------------------------------
#fun : before hook submit to cluster
#param : N/A
#return : 0 -- success; not 0 -- failure
#-----------------------------------------------------------------------------------------------------------------
function before_submit() {
T
tangwei 已提交
46
  echo "before_submit"
T
tangwei 已提交
47 48 49 50 51 52 53 54
}

#-----------------------------------------------------------------------------------------------------------------
#fun : after hook submit to cluster
#param : N/A
#return : 0 -- success; not 0 -- failure
#-----------------------------------------------------------------------------------------------------------------
function after_submit() {
T
tangwei 已提交
55
  echo "after_submit"
T
tangwei 已提交
56 57 58 59 60 61 62 63 64 65
}

#-----------------------------------------------------------------------------------------------------------------
#fun : submit to cluster
#param : N/A
#return : 0 -- success; not 0 -- failure
#-----------------------------------------------------------------------------------------------------------------
function submit() {
  g_run_stage="submit"

T
tangwei 已提交
66 67 68 69
  g_job_name="paddle_rec_mpi"
  g_hdfs_path=$g_hdfs_path

  g_job_entry="worker.sh"
T
tangwei 已提交
70

T
tangwei 已提交
71
  ${$engine_submit_hpc}/bin/qsub_f \
T
tangwei 已提交
72
    -N ${g_job_name} \
T
tangwei 已提交
73 74 75 76 77 78
    --conf ${engine_submit_qconf} \
    --hdfs ${engine_hdfs_name} \
    --ugi ${engine_hdfs_ugi} \
    --hout ${engine_hdfs_output} \
    --files ${engine_temp_path} \
    -l nodes=${engine_submit_nodes},walltime=1000:00:00,resource=full ${g_job_entry}
T
tangwei 已提交
79 80 81 82

}

function main() {
T
tangwei 已提交
83 84 85 86 87
  package

  before_submit
  submit
  after_submit
T
tangwei 已提交
88
}