diff --git a/core/trainers/cluster_trainer.py b/core/trainers/cluster_trainer.py index 8d71280c2998ae3ff2b89b0c4c5422e2b70dd61b..a23ba61c43545f7c42f9bcaac554f8df83e697a4 100755 --- a/core/trainers/cluster_trainer.py +++ b/core/trainers/cluster_trainer.py @@ -31,8 +31,7 @@ from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer class ClusterTrainer(TranspileTrainer): def processor_register(self): - #role = PaddleCloudRoleMaker() - role = MPISymetricRoleMaker() + role = PaddleCloudRoleMaker() fleet.init(role) if fleet.is_server(): diff --git a/example/mpi/worker.sh b/example/mpi/job.sh similarity index 100% rename from example/mpi/worker.sh rename to example/mpi/job.sh diff --git a/example/mpi/submit.sh b/example/mpi/submit.sh index 56b5f8798f0e4181dfd54d9e831078e4b1533d39..33eba8394aae179d09ca8ae85547d98fd5db38bd 100644 --- a/example/mpi/submit.sh +++ b/example/mpi/submit.sh @@ -65,7 +65,7 @@ function submit() { g_run_stage="submit" g_job_name="paddle_rec_mpi" g_hdfs_path=$g_hdfs_path - g_job_entry="worker.sh" + g_job_entry="job.sh" engine_hdfs_output=${engine_hdfs_output}/$(date +%Y%m%d%H%M%S)