remove cluster example

4cdab80b · tangwei · 077e682f · 077e682f · 077e682f · 077e682f
11 changed file
--- a/example/__init__.py
+++ b/example/__init__.py
--- a/example/cloud/backend.yaml
+++ b/example/cloud/backend.yaml
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-engine:
-  workspace: "/home/tangwei/fleet_rec_env/paddlerec/example/cloud"
-  backend: "PaddleCloud"
-
-  submit:
-    sk: "2907445932295c57bffab28676fc5a66"
-    ak: "55cb2027d7a05b9f8b213de483698cc8"
-    version: "paddle-fluid-v1.7.1"
-    priority: "high"
-    jobname: "paddlerec_ctr_dnn"
-    group: "paddle_benchmark"
-    cluster: "SZWG-SUNWAY"
-    config: "{workspace}/config.ini"
-    nodes: 8
-
-  submit_scrpit: "{workspace}/submit.sh"
--- a/example/cloud/before_hook.sh
+++ b/example/cloud/before_hook.sh
-echo "Run before_hook.sh ..."
-../python27-gcc482/bin/python ../python27-gcc482/bin/pip install ./thirdparty/paddle_rec-0.0.2-py2-none-any.whl --index-url=http://pip.baidu.com/pypi/simple --trusted-host pip.baidu.com
-echo "End before_hook.sh ..."
--- a/example/cloud/config.ini
+++ b/example/cloud/config.ini
-#type of storage cluster
-storage_type="hdfs"
-#attention: files for training should be put on hdfs
-force_reuse_output_path="True"
-
-fs_name="afs://yinglong.afs.baidu.com:9902"
-fs_ugi="paddle,paddle"
-
-thirdparty_path="/user/paddle/benchmark/ctr/thirdparty"
-
-#train data path on hdfs
-train_data_path="/user/paddle/benchmark/ctr/train_data_paddle/part_1*"
-#test data path on hdfs
-#test_data_path="/user/paddle/benchmark/ctr/test_data"
-
-#the output directory on hdfs
-output_path="/user/paddle/ly"
\ No newline at end of file
--- a/example/cloud/config.yaml
+++ b/example/cloud/config.yaml
-train:
-  trainer:
-    # for cluster training
-    strategy: "async"
-
-  epochs: 10
-  workspace: "paddlerec.models.rank.dnn"
-
-  reader:
-    batch_size: 512
-    class: "{workspace}/../criteo_reader.py"
-    train_data_path: "train_data"
-    reader_debug_mode: False
-
-  model:
-    models: "{workspace}/model.py"
-    hyper_parameters:
-      sparse_inputs_slots: 27
-      sparse_feature_number: 1000001
-      sparse_feature_dim: 10
-      dense_input_dim: 13
-      fc_sizes: [400, 400, 400]
-      learning_rate: 0.0001
-      optimizer: adam
-
-  save:
-    increment:
-      dirname: "increment"
-      epoch_interval: 2
-      save_last: True
-    inference:
-      dirname: "inference"
-      epoch_interval: 4
-      save_last: True
--- a/example/cloud/job.sh
+++ b/example/cloud/job.sh
-#!/bin/bash
-
-###################################################
-# Usage: job.sh
-# Description: run mpi job clinet implement
-###################################################
-
-
-# ---------------------------------------------------------------------------- #
-#                            variable define                                   #
-# ---------------------------------------------------------------------------- #
-export CPU_NUM=16
-export GLOG_v=0
-export FLAGS_rpc_deadline=300000
-# ---------------------------------------------------------------------------- #
-
-python -m paddlerec.run -m paddle_rec_config.yaml -e cluster -r worker
--- a/example/cloud/submit.sh
+++ b/example/cloud/submit.sh
-#!/bin/bash
-
-###################################################
-# Usage: submit.sh
-# Description: run mpi submit clinet implement
-###################################################
-
-g_package_files=""
-
-#-----------------------------------------------------------------------------------------------------------------
-#fun : before hook submit to cluster
-#param : N/A
-#return : 0 -- success; not 0 -- failure
-#-----------------------------------------------------------------------------------------------------------------
-function before_submit_hook() {
-  echo "before_submit"
-}
-
-#-----------------------------------------------------------------------------------------------------------------
-#fun : after hook submit to cluster
-#param : N/A
-#return : 0 -- success; not 0 -- failure
-#-----------------------------------------------------------------------------------------------------------------
-function after_submit_hook() {
-  echo "after_submit"
-}
-
-#-----------------------------------------------------------------------------------------------------------------
-#fun : package to cluster
-#param : N/A
-#return : 0 -- success; not 0 -- failure
-#-----------------------------------------------------------------------------------------------------------------
-function package() {
-  echo "package"
-  temp=${engine_temp_path}
-
-  cp ${engine_workspace}/job.sh ${temp}
-  cp ${engine_workspace}/before_hook.sh ${temp}
-  cp ${engine_run_config} ${temp}/paddle_rec_config.yaml
-
-  g_submitfiles="job.sh before_hook.sh paddle_rec_config.yaml"
-  g_run_cmd="sh job.sh"
-}
--- a/example/mpi/__init__.py
+++ b/example/mpi/__init__.py
--- a/example/mpi/backend.yaml
+++ b/example/mpi/backend.yaml
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-engine:
-  workspace: "paddlerec.models.rank.dnn"
-  backend: "MPI"
-
-  hdfs:
-    name: "hdfs://nmg01-taihang-hdfs.dmop.baidu.com:54310"
-    ugi: "fcr,SaK2VqfEDeXzKPor"
-    output: "/app/ecom/fcr/fanyabo/wadstyleimageq/tangwei12/output_1/"
-
-  package:
-    build_script: "{workspace}/package.sh"
-    python: "/home/tangwei/fleet_rec_env/cpython-2.7.11-ucs4"
-    paddlerec: "/home/tangwei/fleet_rec_env/PaddleRec"
-
-  submit:
-    hpc: "/home/tangwei/Plines/client/smart_client_khan/"
-    qconf: "/home/tangwei/Plines/imageq/qsub_f.conf"
-    nodes: 10
-
-  submit_scrpit: "{workspace}/submit.sh"
-  job_scrpit: "{workspace}/worker.sh"
--- a/example/mpi/job.sh
+++ b/example/mpi/job.sh
-#!/bin/bash
-
-###################################################
-# Usage: job.sh
-# Description: run job on mpi per node
-###################################################
-
-# ---------------------------------------------------------------------------- #
-#                            variable define                                   #
-# ---------------------------------------------------------------------------- #
-declare g_curPath=""
-declare g_scriptName=""
-declare g_workPath=""
-declare g_run_stage=""
-
-# ---------------------------------------------------------------------------- #
-#                             const define                                     #
-# ---------------------------------------------------------------------------- #
-export FLAGS_communicator_thread_pool_size=5
-export FLAGS_communicator_send_queue_size=18
-export FLAGS_communicator_thread_pool_size=20
-export FLAGS_communicator_max_merge_var_num=18
-################################################################################
-
-#-----------------------------------------------------------------------------------------------------------------
-#fun : check function return code
-#param : N/A
-#return : 0 -- success; not 0 -- failure
-#-----------------------------------------------------------------------------------------------------------------
-function check_error() {
-  if [ ${?} -ne 0 ]; then
-    echo "execute " + $g_run_stage + " raise exception! please check ..."
-    exit 1
-  fi
-}
-
-#-----------------------------------------------------------------------------------------------------------------
-#fun : check function return code
-#param : N/A
-#return : 0 -- success; not 0 -- failure
-#-----------------------------------------------------------------------------------------------------------------
-function env_prepare() {
-  g_run_stage="env_prepare"
-  WORKDIR=$(pwd)
-  mpirun -npernode 1 mv package/* ./
-  echo "current:"$WORKDIR
-
-  mpirun -npernode 1 tar -zxvf python.tar.gz > /dev/null
-
-  export PYTHONPATH=$WORKDIR/python/
-  export PYTHONROOT=$WORKDIR/python/
-  export LIBRARY_PATH=$PYTHONPATH/lib:$LIBRARY_PATH
-  export LD_LIBRARY_PATH=$PYTHONPATH/lib:$LD_LIBRARY_PATH
-  export PATH=$PYTHONPATH/bin:$PATH
-  export LIBRARY_PATH=$PYTHONROOT/lib:$LIBRARY_PATH
-
-  mpirun -npernode 1 python/bin/python -m pip uninstall -y paddle-rec
-  mpirun -npernode 1 python/bin/python -m pip install whl/fleet_rec-0.0.2-py2-none-any.whl --index-url=http://pip.baidu.com/pypi/simple --trusted-host pip.baidu.com
-  check_error
-}
-
-function run() {
-  echo "run"
-  g_run_stage="run"
-  mpirun -npernode 2 -timestamp-output -tag-output -machinefile ${PBS_NODEFILE} python/bin/python -u -m paddlerec.run -m paddlerec.models.rank.dnn --engine cluster --role worker
-}
-
-function main() {
-  env_prepare
-  run
-}
-
-main
--- a/example/mpi/submit.sh
+++ b/example/mpi/submit.sh
-#!/bin/bash
-
-###################################################
-# Usage: submit.sh
-# Description: run mpi submit clinet implement
-###################################################
-
-#-----------------------------------------------------------------------------------------------------------------
-#fun : get argument from env, set it into variables
-#param : N/A
-#return : 0 -- success; not 0 -- failure
-#-----------------------------------------------------------------------------------------------------------------
-function vars_get_from_env() {
-  echo "xx"
-}
-
-#-----------------------------------------------------------------------------------------------------------------
-#fun : package
-#param : N/A
-#return : 0 -- success; not 0 -- failure
-#-----------------------------------------------------------------------------------------------------------------
-function package() {
-  g_run_stage="package"
-
-  temp=${engine_temp_path}
-  echo "package temp dir: " ${temp}
-
-  cp ${engine_job_scrpit} ${temp}
-  cp ${engine_submit_qconf} ${temp}
-  echo "copy job.sh from " ${engine_worker} " to " ${temp}
-
-  mkdir -p ${temp}/package
-  cp -r ${engine_package_python} ${temp}/package/
-  echo "copy python from " ${engine_package_python} " to " ${temp}
-
-  mkdir ${temp}/package/whl
-  cp ${engine_package_paddlerec} ${temp}/package/whl/
-  echo "copy " ${engine_package_paddlerec} " to " ${temp}"/whl/"
-}
-
-#-----------------------------------------------------------------------------------------------------------------
-#fun : before hook submit to cluster
-#param : N/A
-#return : 0 -- success; not 0 -- failure
-#-----------------------------------------------------------------------------------------------------------------
-function before_submit() {
-  echo "before_submit"
-}
-
-#-----------------------------------------------------------------------------------------------------------------
-#fun : after hook submit to cluster
-#param : N/A
-#return : 0 -- success; not 0 -- failure
-#-----------------------------------------------------------------------------------------------------------------
-function after_submit() {
-  echo "after_submit"
-}
-
-#-----------------------------------------------------------------------------------------------------------------
-#fun : submit to cluster
-#param : N/A
-#return : 0 -- success; not 0 -- failure
-#-----------------------------------------------------------------------------------------------------------------
-function submit() {
-  g_run_stage="submit"
-  g_job_name="paddle_rec_mpi"
-  g_hdfs_path=$g_hdfs_path
-  g_job_entry="job.sh"
-
-  engine_hdfs_output=${engine_hdfs_output}/$(date +%Y%m%d%H%M%S)
-
-  cd ${engine_temp_path}
-
-  ${engine_submit_hpc}/bin/qsub_f \
-    -N ${g_job_name} \
-    --conf ${engine_submit_qconf} \
-    --hdfs ${engine_hdfs_name} \
-    --ugi ${engine_hdfs_ugi} \
-    --hout ${engine_hdfs_output} \
-    --files ./package \
-    -l nodes=${engine_submit_nodes},walltime=1000:00:00,resource=full ${g_job_entry}
-}
-
-function main() {
-  package
-
-  before_submit
-  submit
-  after_submit
-}