提交 797b515a 编写于 作者: T tangwei

add paddle cloud run

上级 21ddc265
#!/bin/bash
###################################################
# Usage: submit.sh
# Description: run mpi submit clinet implement
###################################################
# ---------------------------------------------------------------------------- #
# variable define #
# ---------------------------------------------------------------------------- #
declare g_jobname=""
declare g_version=""
declare g_priority=""
declare g_nodes=""
declare g_run_cmd=""
declare g_groupname=""
declare g_config=""
declare g_submitfiles=""
declare g_ak=""
declare g_sk=""
declare g_user_define_script=""
# ---------------------------------------------------------------------------- #
#-----------------------------------------------------------------------------------------------------------------
#fun : package
#param : N/A
#return : 0 -- success; not 0 -- failure
#-----------------------------------------------------------------------------------------------------------------
function package_hook() {
g_run_stage="package"
package
}
#-----------------------------------------------------------------------------------------------------------------
#fun : before hook submit to cluster
#param : N/A
#return : 0 -- success; not 0 -- failure
#-----------------------------------------------------------------------------------------------------------------
function _before_submit() {
echo "before_submit"
before_submit_hook
}
#-----------------------------------------------------------------------------------------------------------------
#fun : after hook submit to cluster
#param : N/A
#return : 0 -- success; not 0 -- failure
#-----------------------------------------------------------------------------------------------------------------
function _after_submit() {
echo "after_submit"
after_submit_hook
}
#-----------------------------------------------------------------------------------------------------------------
#fun : submit to cluster
#param : N/A
#return : 0 -- success; not 0 -- failure
#-----------------------------------------------------------------------------------------------------------------
function _submit() {
g_run_stage="submit"
paddlecloud job --ak ${g_ak} --sk ${g_sk} train --cluster-name ${g_jobname} \
--job-version ${g_version} \
--mpi-priority ${g_priority} \
--mpi-wall-time 300:59:00 \
--mpi-nodes ${g_nodes} --is-standalone 0 \
--mpi-memory 110Gi \
--job-name ${g_jobname} \
--start-cmd ${g_run_cmd} \
--group-name ${g_groupname} \
--job-conf ${g_config} \
--files ${g_submitfiles} \
--json
}
function submit_hook() {
_before_submit
_submit
_after_submit
}
function main() {
source ${g_user_define_script}
package_hook
submit_hook
}
main
\ No newline at end of file
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
engine:
workspace: "paddlerec.models.rank.dnn"
backend: "PaddleCloud"
submit:
sk: "2907445932295c57bffab28676fc5a66"
ak: "55cb2027d7a05b9f8b213de483698cc8"
version: "paddle-fluid-v1.7.1"
priority: "high"
jobname: "paddlerec_ctr_dnn"
group: "paddle_benchmark"
cluster: "SZWG-SUNWAY"
config: "{workspace}/config.ini"
nodes: 10
submit_scrpit: "{workspace}/submit.sh"
echo "Run before_hook.sh ..."
echo "End before_hook.sh ..."
#type of storage cluster
storage_type="hdfs"
#attention: files for training should be put on hdfs
force_reuse_output_path="True"
fs_name="afs://yinglong.afs.baidu.com:9902"
fs_ugi="paddle,paddle"
FLAGS_rpc_deadline=300000
##train data path on hdfs
train_data_path="/user/paddle/benchmark/ctr/train_data_paddle"
thirdparty_path="/user/paddle/benchmark/ctr/thirdparty"
#the output directory on hdfs
output_path="/user/paddle/ly"
#the test data path on hdfs
test_data_path="/user/paddle/benchmark/ctr/test_data"
CPU_NUM=16
GLOG_v=0
FLAGS_communicator_fake_rpc=0
FLAGS_rpc_retry_times=0
#!/bin/bash
###################################################
# Usage: submit.sh
# Description: run mpi submit clinet implement
###################################################
#-----------------------------------------------------------------------------------------------------------------
#fun : before hook submit to cluster
#param : N/A
#return : 0 -- success; not 0 -- failure
#-----------------------------------------------------------------------------------------------------------------
function before_submit_hook() {
echo "before_submit"
}
#-----------------------------------------------------------------------------------------------------------------
#fun : after hook submit to cluster
#param : N/A
#return : 0 -- success; not 0 -- failure
#-----------------------------------------------------------------------------------------------------------------
function after_submit_hook() {
echo "after_submit"
}
#-----------------------------------------------------------------------------------------------------------------
#fun : package to cluster
#param : N/A
#return : 0 -- success; not 0 -- failure
#-----------------------------------------------------------------------------------------------------------------
function package() {
echo "package"
}
......@@ -54,8 +54,6 @@ function env_prepare() {
export PATH=$PYTHONPATH/bin:$PATH
export LIBRARY_PATH=$PYTHONROOT/lib:$LIBRARY_PATH
python -c "print('heheda')"
mpirun -npernode 1 python/bin/python -m pip uninstall -y paddle-rec
mpirun -npernode 1 python/bin/python -m pip install whl/fleet_rec-0.0.2-py2-none-any.whl --index-url=http://pip.baidu.com/pypi/simple --trusted-host pip.baidu.com
check_error
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册