提交 0411147c 编写于 作者: J jingqinghe

update document

上级 9746199f
...@@ -5,13 +5,14 @@ This document introduces how to submit an FL job to mpi cluster ...@@ -5,13 +5,14 @@ This document introduces how to submit an FL job to mpi cluster
### Dependency ### Dependency
- paddlepaddle>=1.8 - paddlepaddle>=1.8
- paddle_fl==0.2.0
### How to install PaddleFL ### How to install PaddleFL
Please use pip which has paddlepaddle installed Please use pip which has paddlepaddle installed
```sh ```sh
pip install paddle_fl pip install paddle_fl==0.2.0
``` ```
### How it works ### How it works
...@@ -27,7 +28,7 @@ The information of the cluster is defined in config.txt and will be transmitted ...@@ -27,7 +28,7 @@ The information of the cluster is defined in config.txt and will be transmitted
The train_program.py is the executed program in cluster. The train_program.py is the executed program in cluster.
```sh ```sh
#use the python prepared above to submit job #use the python prepared above to generate fl job and submit the job to mpi cluster
python/bin/python client.py config.txt python/bin/python client.py config.txt
``` ```
......
...@@ -18,16 +18,16 @@ import random ...@@ -18,16 +18,16 @@ import random
import zmq import zmq
import time import time
import sys import sys
from paddle_fl.paddle_fl.core.submitter.client_base import HPCClient from paddle_fl.core.submitter.client_base import HPCClient
from paddle_fl.paddle_fl.core.scheduler.agent_master import FLScheduler from paddle_fl.core.scheduler.agent_master import FLScheduler
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle_fl.paddle_fl.core.master.job_generator import JobGenerator from paddle_fl.core.master.job_generator import JobGenerator
from paddle_fl.paddle_fl.core.strategy.fl_strategy_base import FLStrategyFactory from paddle_fl.core.strategy.fl_strategy_base import FLStrategyFactory
from model import Model from model import Model
import tarfile import tarfile
#random_port = random.randint(60001, 64001) #random_port = random.randint(60001, 64001)
random_port = 60001 random_port = 64001
print(random_port) print(random_port)
current_ip = socket.gethostbyname(socket.gethostname()) current_ip = socket.gethostbyname(socket.gethostname())
endpoints = "{}:{}".format(current_ip, random_port) endpoints = "{}:{}".format(current_ip, random_port)
...@@ -51,8 +51,8 @@ default_dict = { ...@@ -51,8 +51,8 @@ default_dict = {
"ugi": "", "ugi": "",
"worker_nodes": 5, "worker_nodes": 5,
"server_nodes": 1, "server_nodes": 1,
"hadoop_home": "", "hadoop_home": "/path/to/hadoop",
"hpc_home": "", "hpc_home": "/path/to/hpc",
"package_path": "./package", "package_path": "./package",
"priority": "high", "priority": "high",
"queue": "", "queue": "",
......
...@@ -17,17 +17,17 @@ import random ...@@ -17,17 +17,17 @@ import random
import zmq import zmq
import os import os
import tarfile import tarfile
import paddle_fl.paddle_fl as fl import paddle_fl as fl
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle_fl.paddle_fl.core.server.fl_server import FLServer from paddle_fl.core.server.fl_server import FLServer
from paddle_fl.paddle_fl.core.master.fl_job import FLRunTimeJob from paddle_fl.core.master.fl_job import FLRunTimeJob
from paddle_fl.paddle_fl.core.trainer.fl_trainer import FLTrainerFactory from paddle_fl.core.trainer.fl_trainer import FLTrainerFactory
import numpy as np import numpy as np
import sys import sys
import logging import logging
import time import time
random_port = 60001 random_port = 64001
scheduler_conf = {} scheduler_conf = {}
#connect to scheduler and get the role and id of the endpoint #connect to scheduler and get the role and id of the endpoint
...@@ -99,8 +99,7 @@ else: ...@@ -99,8 +99,7 @@ else:
job._scheduler_ep = scheduler_conf["ENDPOINT"] job._scheduler_ep = scheduler_conf["ENDPOINT"]
trainer = FLTrainerFactory().create_fl_trainer(job) trainer = FLTrainerFactory().create_fl_trainer(job)
trainer._current_ep = endpoint trainer._current_ep = endpoint
place = fluid.CPUPlace() trainer.start()
trainer.start(place)
print(trainer._scheduler_ep, trainer._current_ep) print(trainer._scheduler_ep, trainer._current_ep)
output_folder = "fl_model" output_folder = "fl_model"
epoch_id = 0 epoch_id = 0
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册