提交 16966dc5 编写于 作者: T tangwei

add qsub submit

上级 12f161d6
...@@ -58,6 +58,10 @@ class ClusterEngine(Engine): ...@@ -58,6 +58,10 @@ class ClusterEngine(Engine):
role = envs.get_runtime_environ("engine_role") role = envs.get_runtime_environ("engine_role")
if role == "MASTER": if role == "MASTER":
worker_script = {}
worker_script["engine_worker"] = self.job_script
envs.set_runtime_environs(worker_script)
self.start_master_procs() self.start_master_procs()
elif role == "WORKER": elif role == "WORKER":
......
...@@ -16,6 +16,11 @@ engine: ...@@ -16,6 +16,11 @@ engine:
workspace: "fleetrec.models.rank.dnn" workspace: "fleetrec.models.rank.dnn"
backend: "MPI" backend: "MPI"
hdfs:
name: "hdfs://nmg01-taihang-hdfs.dmop.baidu.com:54310"
ugi: "fcr,SaK2VqfEDeXzKPor"
output: "/app/ecom/fcr/fanyabo/wadstyleimageq/tangwei12/output_1/"
package: package:
build_script: "{workspace}/package.sh" build_script: "{workspace}/package.sh"
python: "/home/tangwei/fleet_rec_env/cpython-2.7.11-ucs4" python: "/home/tangwei/fleet_rec_env/cpython-2.7.11-ucs4"
...@@ -23,11 +28,7 @@ engine: ...@@ -23,11 +28,7 @@ engine:
submit: submit:
hpc: "/home/tangwei/submit-tieba/smart_client/" hpc: "/home/tangwei/submit-tieba/smart_client/"
hdfs: "xx" qconf: "/home/tangwei/Plines/imageq/package/my_conf/para.conf"
hout: "xxx"
ugi: "xxxx"
nodes: 10 nodes: 10
before_hook: ""
end_hook: ""
scrpit: "{workspace}/submit.sh" scrpit: "{workspace}/submit.sh"
\ No newline at end of file
...@@ -22,6 +22,19 @@ function vars_get_from_env() { ...@@ -22,6 +22,19 @@ function vars_get_from_env() {
function package() { function package() {
g_run_stage="package" g_run_stage="package"
temp=${engine_temp_path}
echo "package temp dir: " ${temp}
cp ${engine_worker} ${temp}
echo "copy job.sh from " ${engine_worker} " to " ${temp}
mkdir ${temp}/python
cp -r ${engine_package_python}/* ${temp}/python/
echo "copy python from " ${engine_package_python} " to " ${temp}
mkdir ${temp}/whl
cp ${engine_package_paddlerec} ${temp}/whl/
echo "copy " ${engine_package_paddlerec} " to " ${temp}"/whl/"
} }
#----------------------------------------------------------------------------------------------------------------- #-----------------------------------------------------------------------------------------------------------------
...@@ -50,20 +63,26 @@ function after_submit() { ...@@ -50,20 +63,26 @@ function after_submit() {
function submit() { function submit() {
g_run_stage="submit" g_run_stage="submit"
before_submit g_job_name="paddle_rec_mpi"
g_hdfs_path=$g_hdfs_path
g_job_entry="worker.sh"
${g_hpc_path}/bin/qsub_f \ ${$engine_submit_hpc}/bin/qsub_f \
-N ${g_job_name} \ -N ${g_job_name} \
--conf ${g_qsub_conf} \ --conf ${engine_submit_qconf} \
--hdfs ${g_hdfs_path} \ --hdfs ${engine_hdfs_name} \
--ugi ${g_hdfs_ugi} \ --ugi ${engine_hdfs_ugi} \
--hout ${g_hdfs_output} \ --hout ${engine_hdfs_output} \
--files ${g_submit_package} \ --files ${engine_temp_path} \
-l nodes=${g_job_nodes},walltime=1000:00:00,resource=full ${g_job_entry} -l nodes=${engine_submit_nodes},walltime=1000:00:00,resource=full ${g_job_entry}
after_submit
} }
function main() { function main() {
echo "run submit done" package
before_submit
submit
after_submit
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册