From fff2a92b256a64cb09a02e4ae85520b8dca201ec Mon Sep 17 00:00:00 2001 From: lilong12 Date: Thu, 3 Dec 2020 11:11:39 +0800 Subject: [PATCH] add shell scripts to run training on single node and multiple nodes. (#424) * add shells, test=develop --- tools/benchmark/run_multi_nodes.sh | 14 ++++++++++++++ tools/benchmark/run_single_node.sh | 7 +++++++ 2 files changed, 21 insertions(+) create mode 100755 tools/benchmark/run_multi_nodes.sh create mode 100755 tools/benchmark/run_single_node.sh diff --git a/tools/benchmark/run_multi_nodes.sh b/tools/benchmark/run_multi_nodes.sh new file mode 100755 index 00000000..4a111999 --- /dev/null +++ b/tools/benchmark/run_multi_nodes.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +# IP Addresses of all nodes, modify it corresponding to your own environment +ALL_NODE_IPS="10.10.10.1,10.10.10.2" +# IP Address of the current node, modify it corresponding to your own environment +CUR_NODE_IPS="10.10.10.1" + +python -m paddle.distributed.launch \ + --cluster_node_ips=$ALL_NODE_IPS \ + --node_ip=$CUR_NODE_IPS \ + --gpus="0,1,2,3" \ + tools/train.py \ + -c ./configs/ResNet/ResNet50.yaml \ + -o print_interval=10 diff --git a/tools/benchmark/run_single_node.sh b/tools/benchmark/run_single_node.sh new file mode 100755 index 00000000..5ec44455 --- /dev/null +++ b/tools/benchmark/run_single_node.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +python -m paddle.distributed.launch \ + --gpus="0,1,2,3" \ + tools/train.py \ + -c ./configs/ResNet/ResNet50.yaml \ + -o print_interval=10 -- GitLab