From 4cdab80b718c7d835e2ed1286dbcbfb44322e663 Mon Sep 17 00:00:00 2001 From: tangwei Date: Thu, 14 May 2020 21:17:18 +0800 Subject: [PATCH] remove cluster example --- example/__init__.py | 0 example/cloud/backend.yaml | 30 ------------ example/cloud/before_hook.sh | 3 -- example/cloud/config.ini | 17 ------- example/cloud/config.yaml | 34 -------------- example/cloud/job.sh | 17 ------- example/cloud/submit.sh | 43 ----------------- example/mpi/__init__.py | 0 example/mpi/backend.yaml | 35 -------------- example/mpi/job.sh | 73 ----------------------------- example/mpi/submit.sh | 90 ------------------------------------ 11 files changed, 342 deletions(-) delete mode 100644 example/__init__.py delete mode 100755 example/cloud/backend.yaml delete mode 100644 example/cloud/before_hook.sh delete mode 100644 example/cloud/config.ini delete mode 100755 example/cloud/config.yaml delete mode 100644 example/cloud/job.sh delete mode 100644 example/cloud/submit.sh delete mode 100644 example/mpi/__init__.py delete mode 100755 example/mpi/backend.yaml delete mode 100644 example/mpi/job.sh delete mode 100644 example/mpi/submit.sh diff --git a/example/__init__.py b/example/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/example/cloud/backend.yaml b/example/cloud/backend.yaml deleted file mode 100755 index 2eff4222..00000000 --- a/example/cloud/backend.yaml +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -engine: - workspace: "/home/tangwei/fleet_rec_env/paddlerec/example/cloud" - backend: "PaddleCloud" - - submit: - sk: "2907445932295c57bffab28676fc5a66" - ak: "55cb2027d7a05b9f8b213de483698cc8" - version: "paddle-fluid-v1.7.1" - priority: "high" - jobname: "paddlerec_ctr_dnn" - group: "paddle_benchmark" - cluster: "SZWG-SUNWAY" - config: "{workspace}/config.ini" - nodes: 8 - - submit_scrpit: "{workspace}/submit.sh" diff --git a/example/cloud/before_hook.sh b/example/cloud/before_hook.sh deleted file mode 100644 index 35f8dfa0..00000000 --- a/example/cloud/before_hook.sh +++ /dev/null @@ -1,3 +0,0 @@ -echo "Run before_hook.sh ..." -../python27-gcc482/bin/python ../python27-gcc482/bin/pip install ./thirdparty/paddle_rec-0.0.2-py2-none-any.whl --index-url=http://pip.baidu.com/pypi/simple --trusted-host pip.baidu.com -echo "End before_hook.sh ..." diff --git a/example/cloud/config.ini b/example/cloud/config.ini deleted file mode 100644 index 4baa49dc..00000000 --- a/example/cloud/config.ini +++ /dev/null @@ -1,17 +0,0 @@ -#type of storage cluster -storage_type="hdfs" -#attention: files for training should be put on hdfs -force_reuse_output_path="True" - -fs_name="afs://yinglong.afs.baidu.com:9902" -fs_ugi="paddle,paddle" - -thirdparty_path="/user/paddle/benchmark/ctr/thirdparty" - -#train data path on hdfs -train_data_path="/user/paddle/benchmark/ctr/train_data_paddle/part_1*" -#test data path on hdfs -#test_data_path="/user/paddle/benchmark/ctr/test_data" - -#the output directory on hdfs -output_path="/user/paddle/ly" \ No newline at end of file diff --git a/example/cloud/config.yaml b/example/cloud/config.yaml deleted file mode 100755 index 8cec449b..00000000 --- a/example/cloud/config.yaml +++ /dev/null @@ -1,34 +0,0 @@ -train: - trainer: - # for cluster training - strategy: "async" - - epochs: 10 - workspace: "paddlerec.models.rank.dnn" - - reader: - batch_size: 512 - class: "{workspace}/../criteo_reader.py" - train_data_path: "train_data" - reader_debug_mode: False - - model: - models: "{workspace}/model.py" - hyper_parameters: - sparse_inputs_slots: 27 - sparse_feature_number: 1000001 - sparse_feature_dim: 10 - dense_input_dim: 13 - fc_sizes: [400, 400, 400] - learning_rate: 0.0001 - optimizer: adam - - save: - increment: - dirname: "increment" - epoch_interval: 2 - save_last: True - inference: - dirname: "inference" - epoch_interval: 4 - save_last: True diff --git a/example/cloud/job.sh b/example/cloud/job.sh deleted file mode 100644 index 5f001a6f..00000000 --- a/example/cloud/job.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -################################################### -# Usage: job.sh -# Description: run mpi job clinet implement -################################################### - - -# ---------------------------------------------------------------------------- # -# variable define # -# ---------------------------------------------------------------------------- # -export CPU_NUM=16 -export GLOG_v=0 -export FLAGS_rpc_deadline=300000 -# ---------------------------------------------------------------------------- # - -python -m paddlerec.run -m paddle_rec_config.yaml -e cluster -r worker diff --git a/example/cloud/submit.sh b/example/cloud/submit.sh deleted file mode 100644 index 9a5cdd8c..00000000 --- a/example/cloud/submit.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -################################################### -# Usage: submit.sh -# Description: run mpi submit clinet implement -################################################### - -g_package_files="" - -#----------------------------------------------------------------------------------------------------------------- -#fun : before hook submit to cluster -#param : N/A -#return : 0 -- success; not 0 -- failure -#----------------------------------------------------------------------------------------------------------------- -function before_submit_hook() { - echo "before_submit" -} - -#----------------------------------------------------------------------------------------------------------------- -#fun : after hook submit to cluster -#param : N/A -#return : 0 -- success; not 0 -- failure -#----------------------------------------------------------------------------------------------------------------- -function after_submit_hook() { - echo "after_submit" -} - -#----------------------------------------------------------------------------------------------------------------- -#fun : package to cluster -#param : N/A -#return : 0 -- success; not 0 -- failure -#----------------------------------------------------------------------------------------------------------------- -function package() { - echo "package" - temp=${engine_temp_path} - - cp ${engine_workspace}/job.sh ${temp} - cp ${engine_workspace}/before_hook.sh ${temp} - cp ${engine_run_config} ${temp}/paddle_rec_config.yaml - - g_submitfiles="job.sh before_hook.sh paddle_rec_config.yaml" - g_run_cmd="sh job.sh" -} diff --git a/example/mpi/__init__.py b/example/mpi/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/example/mpi/backend.yaml b/example/mpi/backend.yaml deleted file mode 100755 index 77384db4..00000000 --- a/example/mpi/backend.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -engine: - workspace: "paddlerec.models.rank.dnn" - backend: "MPI" - - hdfs: - name: "hdfs://nmg01-taihang-hdfs.dmop.baidu.com:54310" - ugi: "fcr,SaK2VqfEDeXzKPor" - output: "/app/ecom/fcr/fanyabo/wadstyleimageq/tangwei12/output_1/" - - package: - build_script: "{workspace}/package.sh" - python: "/home/tangwei/fleet_rec_env/cpython-2.7.11-ucs4" - paddlerec: "/home/tangwei/fleet_rec_env/PaddleRec" - - submit: - hpc: "/home/tangwei/Plines/client/smart_client_khan/" - qconf: "/home/tangwei/Plines/imageq/qsub_f.conf" - nodes: 10 - - submit_scrpit: "{workspace}/submit.sh" - job_scrpit: "{workspace}/worker.sh" diff --git a/example/mpi/job.sh b/example/mpi/job.sh deleted file mode 100644 index 4362901b..00000000 --- a/example/mpi/job.sh +++ /dev/null @@ -1,73 +0,0 @@ -#!/bin/bash - -################################################### -# Usage: job.sh -# Description: run job on mpi per node -################################################### - -# ---------------------------------------------------------------------------- # -# variable define # -# ---------------------------------------------------------------------------- # -declare g_curPath="" -declare g_scriptName="" -declare g_workPath="" -declare g_run_stage="" - -# ---------------------------------------------------------------------------- # -# const define # -# ---------------------------------------------------------------------------- # -export FLAGS_communicator_thread_pool_size=5 -export FLAGS_communicator_send_queue_size=18 -export FLAGS_communicator_thread_pool_size=20 -export FLAGS_communicator_max_merge_var_num=18 -################################################################################ - -#----------------------------------------------------------------------------------------------------------------- -#fun : check function return code -#param : N/A -#return : 0 -- success; not 0 -- failure -#----------------------------------------------------------------------------------------------------------------- -function check_error() { - if [ ${?} -ne 0 ]; then - echo "execute " + $g_run_stage + " raise exception! please check ..." - exit 1 - fi -} - -#----------------------------------------------------------------------------------------------------------------- -#fun : check function return code -#param : N/A -#return : 0 -- success; not 0 -- failure -#----------------------------------------------------------------------------------------------------------------- -function env_prepare() { - g_run_stage="env_prepare" - WORKDIR=$(pwd) - mpirun -npernode 1 mv package/* ./ - echo "current:"$WORKDIR - - mpirun -npernode 1 tar -zxvf python.tar.gz > /dev/null - - export PYTHONPATH=$WORKDIR/python/ - export PYTHONROOT=$WORKDIR/python/ - export LIBRARY_PATH=$PYTHONPATH/lib:$LIBRARY_PATH - export LD_LIBRARY_PATH=$PYTHONPATH/lib:$LD_LIBRARY_PATH - export PATH=$PYTHONPATH/bin:$PATH - export LIBRARY_PATH=$PYTHONROOT/lib:$LIBRARY_PATH - - mpirun -npernode 1 python/bin/python -m pip uninstall -y paddle-rec - mpirun -npernode 1 python/bin/python -m pip install whl/fleet_rec-0.0.2-py2-none-any.whl --index-url=http://pip.baidu.com/pypi/simple --trusted-host pip.baidu.com - check_error -} - -function run() { - echo "run" - g_run_stage="run" - mpirun -npernode 2 -timestamp-output -tag-output -machinefile ${PBS_NODEFILE} python/bin/python -u -m paddlerec.run -m paddlerec.models.rank.dnn --engine cluster --role worker -} - -function main() { - env_prepare - run -} - -main diff --git a/example/mpi/submit.sh b/example/mpi/submit.sh deleted file mode 100644 index 33eba839..00000000 --- a/example/mpi/submit.sh +++ /dev/null @@ -1,90 +0,0 @@ -#!/bin/bash - -################################################### -# Usage: submit.sh -# Description: run mpi submit clinet implement -################################################### - -#----------------------------------------------------------------------------------------------------------------- -#fun : get argument from env, set it into variables -#param : N/A -#return : 0 -- success; not 0 -- failure -#----------------------------------------------------------------------------------------------------------------- -function vars_get_from_env() { - echo "xx" -} - -#----------------------------------------------------------------------------------------------------------------- -#fun : package -#param : N/A -#return : 0 -- success; not 0 -- failure -#----------------------------------------------------------------------------------------------------------------- -function package() { - g_run_stage="package" - - temp=${engine_temp_path} - echo "package temp dir: " ${temp} - - cp ${engine_job_scrpit} ${temp} - cp ${engine_submit_qconf} ${temp} - echo "copy job.sh from " ${engine_worker} " to " ${temp} - - mkdir -p ${temp}/package - cp -r ${engine_package_python} ${temp}/package/ - echo "copy python from " ${engine_package_python} " to " ${temp} - - mkdir ${temp}/package/whl - cp ${engine_package_paddlerec} ${temp}/package/whl/ - echo "copy " ${engine_package_paddlerec} " to " ${temp}"/whl/" -} - -#----------------------------------------------------------------------------------------------------------------- -#fun : before hook submit to cluster -#param : N/A -#return : 0 -- success; not 0 -- failure -#----------------------------------------------------------------------------------------------------------------- -function before_submit() { - echo "before_submit" -} - -#----------------------------------------------------------------------------------------------------------------- -#fun : after hook submit to cluster -#param : N/A -#return : 0 -- success; not 0 -- failure -#----------------------------------------------------------------------------------------------------------------- -function after_submit() { - echo "after_submit" -} - -#----------------------------------------------------------------------------------------------------------------- -#fun : submit to cluster -#param : N/A -#return : 0 -- success; not 0 -- failure -#----------------------------------------------------------------------------------------------------------------- -function submit() { - g_run_stage="submit" - g_job_name="paddle_rec_mpi" - g_hdfs_path=$g_hdfs_path - g_job_entry="job.sh" - - engine_hdfs_output=${engine_hdfs_output}/$(date +%Y%m%d%H%M%S) - - cd ${engine_temp_path} - - ${engine_submit_hpc}/bin/qsub_f \ - -N ${g_job_name} \ - --conf ${engine_submit_qconf} \ - --hdfs ${engine_hdfs_name} \ - --ugi ${engine_hdfs_ugi} \ - --hout ${engine_hdfs_output} \ - --files ./package \ - -l nodes=${engine_submit_nodes},walltime=1000:00:00,resource=full ${g_job_entry} -} - -function main() { - package - - before_submit - submit - after_submit -} -- GitLab