提交 0411147c 编写于 作者: J jingqinghe

update document

上级 9746199f
......@@ -5,13 +5,14 @@ This document introduces how to submit an FL job to mpi cluster
### Dependency
- paddlepaddle>=1.8
- paddle_fl==0.2.0
### How to install PaddleFL
Please use pip which has paddlepaddle installed
```sh
pip install paddle_fl
pip install paddle_fl==0.2.0
```
### How it works
......@@ -27,7 +28,7 @@ The information of the cluster is defined in config.txt and will be transmitted
The train_program.py is the executed program in cluster.
```sh
#use the python prepared above to submit job
#use the python prepared above to generate fl job and submit the job to mpi cluster
python/bin/python client.py config.txt
```
......
......@@ -18,16 +18,16 @@ import random
import zmq
import time
import sys
from paddle_fl.paddle_fl.core.submitter.client_base import HPCClient
from paddle_fl.paddle_fl.core.scheduler.agent_master import FLScheduler
from paddle_fl.core.submitter.client_base import HPCClient
from paddle_fl.core.scheduler.agent_master import FLScheduler
import paddle.fluid as fluid
from paddle_fl.paddle_fl.core.master.job_generator import JobGenerator
from paddle_fl.paddle_fl.core.strategy.fl_strategy_base import FLStrategyFactory
from paddle_fl.core.master.job_generator import JobGenerator
from paddle_fl.core.strategy.fl_strategy_base import FLStrategyFactory
from model import Model
import tarfile
#random_port = random.randint(60001, 64001)
random_port = 60001
random_port = 64001
print(random_port)
current_ip = socket.gethostbyname(socket.gethostname())
endpoints = "{}:{}".format(current_ip, random_port)
......@@ -51,8 +51,8 @@ default_dict = {
"ugi": "",
"worker_nodes": 5,
"server_nodes": 1,
"hadoop_home": "",
"hpc_home": "",
"hadoop_home": "/path/to/hadoop",
"hpc_home": "/path/to/hpc",
"package_path": "./package",
"priority": "high",
"queue": "",
......
......@@ -17,17 +17,17 @@ import random
import zmq
import os
import tarfile
import paddle_fl.paddle_fl as fl
import paddle_fl as fl
import paddle.fluid as fluid
from paddle_fl.paddle_fl.core.server.fl_server import FLServer
from paddle_fl.paddle_fl.core.master.fl_job import FLRunTimeJob
from paddle_fl.paddle_fl.core.trainer.fl_trainer import FLTrainerFactory
from paddle_fl.core.server.fl_server import FLServer
from paddle_fl.core.master.fl_job import FLRunTimeJob
from paddle_fl.core.trainer.fl_trainer import FLTrainerFactory
import numpy as np
import sys
import logging
import time
random_port = 60001
random_port = 64001
scheduler_conf = {}
#connect to scheduler and get the role and id of the endpoint
......@@ -99,8 +99,7 @@ else:
job._scheduler_ep = scheduler_conf["ENDPOINT"]
trainer = FLTrainerFactory().create_fl_trainer(job)
trainer._current_ep = endpoint
place = fluid.CPUPlace()
trainer.start(place)
trainer.start()
print(trainer._scheduler_ep, trainer._current_ep)
output_folder = "fl_model"
epoch_id = 0
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册