Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleFL
提交
a6caa651
P
PaddleFL
项目概览
PaddlePaddle
/
PaddleFL
通知
35
Star
5
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
6
列表
看板
标记
里程碑
合并请求
4
Wiki
3
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleFL
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
6
Issue
6
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
3
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a6caa651
编写于
2月 27, 2020
作者:
Q
qjing666
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix code style
上级
d4e75537
变更
29
隐藏空白更改
内联
并排
Showing
29 changed file
with
319 addition
and
205 deletion
+319
-205
contrib/data_safety_training/image_classification/server/server.py
...ata_safety_training/image_classification/server/server.py
+32
-30
contrib/data_safety_training/image_classification/server/user.py
.../data_safety_training/image_classification/server/user.py
+55
-39
contrib/data_safety_training/image_classification/submitter.py
...ib/data_safety_training/image_classification/submitter.py
+1
-1
paddle_fl/examples/dpsgd_demo/fl_master.py
paddle_fl/examples/dpsgd_demo/fl_master.py
+14
-7
paddle_fl/examples/dpsgd_demo/fl_scheduler.py
paddle_fl/examples/dpsgd_demo/fl_scheduler.py
+1
-1
paddle_fl/examples/dpsgd_demo/fl_server.py
paddle_fl/examples/dpsgd_demo/fl_server.py
+2
-2
paddle_fl/examples/dpsgd_demo/fl_trainer.py
paddle_fl/examples/dpsgd_demo/fl_trainer.py
+21
-13
paddle_fl/examples/femnist_demo/fl_master.py
paddle_fl/examples/femnist_demo/fl_master.py
+27
-10
paddle_fl/examples/femnist_demo/fl_scheduler.py
paddle_fl/examples/femnist_demo/fl_scheduler.py
+1
-1
paddle_fl/examples/femnist_demo/fl_server.py
paddle_fl/examples/femnist_demo/fl_server.py
+2
-2
paddle_fl/examples/femnist_demo/fl_trainer.py
paddle_fl/examples/femnist_demo/fl_trainer.py
+41
-25
paddle_fl/examples/gru4rec_demo/fl_master.py
paddle_fl/examples/gru4rec_demo/fl_master.py
+12
-9
paddle_fl/examples/gru4rec_demo/fl_scheduler.py
paddle_fl/examples/gru4rec_demo/fl_scheduler.py
+1
-1
paddle_fl/examples/gru4rec_demo/fl_server.py
paddle_fl/examples/gru4rec_demo/fl_server.py
+2
-2
paddle_fl/examples/gru4rec_demo/fl_trainer.py
paddle_fl/examples/gru4rec_demo/fl_trainer.py
+11
-7
paddle_fl/examples/k8s_deployment/master/fl_master.py
paddle_fl/examples/k8s_deployment/master/fl_master.py
+13
-6
paddle_fl/examples/k8s_deployment/scheduler/fl_scheduler.py
paddle_fl/examples/k8s_deployment/scheduler/fl_scheduler.py
+3
-1
paddle_fl/examples/k8s_deployment/server/fl_server.py
paddle_fl/examples/k8s_deployment/server/fl_server.py
+5
-3
paddle_fl/examples/k8s_deployment/trainer0/fl_trainer.py
paddle_fl/examples/k8s_deployment/trainer0/fl_trainer.py
+12
-5
paddle_fl/examples/k8s_deployment/trainer1/fl_trainer.py
paddle_fl/examples/k8s_deployment/trainer1/fl_trainer.py
+12
-5
paddle_fl/examples/secagg_demo/fl_master.py
paddle_fl/examples/secagg_demo/fl_master.py
+12
-5
paddle_fl/examples/secagg_demo/fl_scheduler.py
paddle_fl/examples/secagg_demo/fl_scheduler.py
+1
-1
paddle_fl/examples/secagg_demo/fl_server.py
paddle_fl/examples/secagg_demo/fl_server.py
+2
-2
paddle_fl/examples/secagg_demo/keys/0_pub_key.txt
paddle_fl/examples/secagg_demo/keys/0_pub_key.txt
+1
-1
paddle_fl/examples/secagg_demo/keys/1_pub_key.txt
paddle_fl/examples/secagg_demo/keys/1_pub_key.txt
+1
-1
paddle_fl/examples/submitter_demo/conf.txt
paddle_fl/examples/submitter_demo/conf.txt
+0
-1
paddle_fl/examples/submitter_demo/model.py
paddle_fl/examples/submitter_demo/model.py
+3
-2
paddle_fl/examples/submitter_demo/scheduler_client.py
paddle_fl/examples/submitter_demo/scheduler_client.py
+27
-17
paddle_fl/examples/submitter_demo/train_program.py
paddle_fl/examples/submitter_demo/train_program.py
+4
-5
未找到文件。
contrib/data_safety_training/image_classification/server/server.py
浏览文件 @
a6caa651
...
...
@@ -12,14 +12,14 @@ import math
import
msgpack
def
data_generater
(
samples
,
r
):
# data generater
def
data_generater
(
samples
,
r
):
# data generater
def
train_data
():
for
item
in
samples
:
sample
=
msgpack
.
loads
(
r
.
get
(
str
(
item
)))
conv
=
sample
[
0
]
label
=
sample
[
1
]
yield
conv
,
label
yield
conv
,
label
return
train_data
...
...
@@ -67,7 +67,7 @@ class ResNet():
size
=
class_dim
,
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
-
stdv
,
stdv
)),
act
=
"softmax"
)
act
=
"softmax"
)
else
:
for
block
in
range
(
len
(
depth
)):
for
i
in
range
(
depth
[
block
]):
...
...
@@ -87,7 +87,7 @@ class ResNet():
size
=
class_dim
,
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
-
stdv
,
stdv
)),
act
=
"softmax"
)
act
=
"softmax"
)
return
out
def
conv_bn_layer
(
self
,
...
...
@@ -123,8 +123,6 @@ class ResNet():
moving_mean_name
=
bn_name
+
'_mean'
,
moving_variance_name
=
bn_name
+
'_variance'
,
)
def
shortcut
(
self
,
input
,
ch_out
,
stride
,
is_first
,
name
):
ch_in
=
input
.
shape
[
1
]
if
ch_in
!=
ch_out
or
stride
!=
1
or
is_first
==
True
:
...
...
@@ -181,31 +179,33 @@ class ResNet():
input
,
num_filters
,
stride
,
is_first
,
name
=
name
+
"_branch1"
)
return
fluid
.
layers
.
elementwise_add
(
x
=
short
,
y
=
conv1
,
act
=
'relu'
)
# local redis config
redis_host
=
"127.0.0.1"
redis_port
=
6379
redis_password
=
""
r
=
redis
.
StrictRedis
(
host
=
redis_host
,
port
=
redis_port
,
password
=
redis_password
)
r
=
redis
.
StrictRedis
(
host
=
redis_host
,
port
=
redis_port
,
password
=
redis_password
)
# reader generation
reader
=
fluid
.
layers
.
py_reader
(
capacity
=
64
,
shapes
=
[(
-
1
,
64
,
8
,
8
),
(
-
1
,
1
)],
dtypes
=
[
'float32'
,
'int64'
])
reader
=
fluid
.
layers
.
py_reader
(
capacity
=
64
,
shapes
=
[(
-
1
,
64
,
8
,
8
),
(
-
1
,
1
)],
dtypes
=
[
'float32'
,
'int64'
])
samples
=
r
.
keys
()
train_data
=
data_generater
(
samples
,
r
)
train_data
=
data_generater
(
samples
,
r
)
reader
.
decorate_paddle_reader
(
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
train_data
,
buf_size
=
5000
),
batch_size
=
64
))
reader
.
decorate_paddle_reader
(
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
train_data
,
buf_size
=
5000
),
batch_size
=
64
))
conv1
,
label
=
fluid
.
layers
.
read_file
(
reader
)
conv1
,
label
=
fluid
.
layers
.
read_file
(
reader
)
# train program
place
=
fluid
.
CUDAPlace
(
0
)
model
=
ResNet
(
layers
=
50
)
predicts
=
model
.
net
(
conv1
,
10
)
predicts
=
model
.
net
(
conv1
,
10
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
predicts
,
label
=
label
)
accuracy
=
fluid
.
layers
.
accuracy
(
input
=
predicts
,
label
=
label
)
loss
=
fluid
.
layers
.
mean
(
cost
)
...
...
@@ -222,18 +222,20 @@ step = 0
train_start
=
time
.
time
()
# start training
for
pass_id
in
range
(
EPOCH_NUM
):
reader
.
start
()
try
:
while
True
:
start_time
=
time
.
time
()
loss_value
,
acc_value
=
exe
.
run
(
fetch_list
=
[
loss
.
name
,
accuracy
.
name
])
step
+=
1
if
step
%
10
==
0
:
print
(
"epoch: "
+
str
(
pass_id
)
+
"step: "
+
str
(
step
)
+
"loss: "
+
str
(
loss_value
)
+
"acc: "
+
str
(
acc_value
))
end_time
=
time
.
time
()
total_time
+=
(
end_time
-
start_time
)
except
fluid
.
core
.
EOFException
:
reader
.
reset
()
reader
.
start
()
try
:
while
True
:
start_time
=
time
.
time
()
loss_value
,
acc_value
=
exe
.
run
(
fetch_list
=
[
loss
.
name
,
accuracy
.
name
])
step
+=
1
if
step
%
10
==
0
:
print
(
"epoch: "
+
str
(
pass_id
)
+
"step: "
+
str
(
step
)
+
"loss: "
+
str
(
loss_value
)
+
"acc: "
+
str
(
acc_value
))
end_time
=
time
.
time
()
total_time
+=
(
end_time
-
start_time
)
except
fluid
.
core
.
EOFException
:
reader
.
reset
()
train_end
=
time
.
time
()
print
(
"total time: %d"
%
(
train_end
-
train_start
))
print
(
"computation time: %d"
%
total_time
)
contrib/data_safety_training/image_classification/server/user.py
浏览文件 @
a6caa651
...
...
@@ -5,10 +5,12 @@ import paddle.fluid as fluid
import
numpy
import
sys
import
redis
import
time
import
time
from
paddle.fluid
import
layers
from
paddle.fluid.param_attr
import
ParamAttr
import
msgpack
def
conv_bn_layer
(
input
,
num_filters
,
filter_size
,
...
...
@@ -16,30 +18,30 @@ def conv_bn_layer(input,
groups
=
1
,
act
=
None
,
name
=
None
):
conv
=
fluid
.
layers
.
conv2d
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
filter_size
,
stride
=
stride
,
padding
=
(
filter_size
-
1
)
//
2
,
groups
=
groups
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
bias_attr
=
False
,
name
=
name
+
'.conv2d.output.1'
)
if
name
==
"conv1"
:
bn_name
=
"bn_"
+
name
else
:
bn_name
=
"bn"
+
name
[
3
:]
return
fluid
.
layers
.
batch_norm
(
input
=
conv
,
act
=
act
,
name
=
bn_name
+
'.output.1'
,
param_attr
=
ParamAttr
(
name
=
bn_name
+
'_scale'
),
bias_attr
=
ParamAttr
(
bn_name
+
'_offset'
),
moving_mean_name
=
bn_name
+
'_mean'
,
moving_variance_name
=
bn_name
+
'_variance'
,
)
conv
=
fluid
.
layers
.
conv2d
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
filter_size
,
stride
=
stride
,
padding
=
(
filter_size
-
1
)
//
2
,
groups
=
groups
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
bias_attr
=
False
,
name
=
name
+
'.conv2d.output.1'
)
if
name
==
"conv1"
:
bn_name
=
"bn_"
+
name
else
:
bn_name
=
"bn"
+
name
[
3
:]
return
fluid
.
layers
.
batch_norm
(
input
=
conv
,
act
=
act
,
name
=
bn_name
+
'.output.1'
,
param_attr
=
ParamAttr
(
name
=
bn_name
+
'_scale'
),
bias_attr
=
ParamAttr
(
bn_name
+
'_offset'
),
moving_mean_name
=
bn_name
+
'_mean'
,
moving_variance_name
=
bn_name
+
'_variance'
,
)
def
load_conf
(
conf_file
,
local_dict
):
...
...
@@ -51,6 +53,7 @@ def load_conf(conf_file, local_dict):
local_dict
[
group
[
0
]]
=
group
[
1
]
return
local_dict
# redis DB configuration
redis_host
=
"127.0.0.1"
redis_port
=
6379
...
...
@@ -58,26 +61,39 @@ redis_password = ""
start_time
=
time
.
time
()
# start a redis client and empty the DB
r
=
redis
.
StrictRedis
(
host
=
redis_host
,
port
=
redis_port
,
password
=
redis_password
)
r
=
redis
.
StrictRedis
(
host
=
redis_host
,
port
=
redis_port
,
password
=
redis_password
)
r
.
flushall
()
# encoding program
images
=
fluid
.
layers
.
data
(
name
=
'images'
,
shape
=
[
3
,
32
,
32
],
dtype
=
'float32'
)
images
=
fluid
.
layers
.
data
(
name
=
'images'
,
shape
=
[
3
,
32
,
32
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
place
=
fluid
.
CPUPlace
()
conv1
=
conv_bn_layer
(
input
=
images
,
num_filters
=
64
,
filter_size
=
7
,
stride
=
2
,
act
=
'relu'
,
name
=
"conv1"
)
pool
=
fluid
.
layers
.
pool2d
(
input
=
conv1
,
pool_size
=
3
,
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
images
,
label
])
conv1
=
conv_bn_layer
(
input
=
images
,
num_filters
=
64
,
filter_size
=
7
,
stride
=
2
,
act
=
'relu'
,
name
=
"conv1"
)
pool
=
fluid
.
layers
.
pool2d
(
input
=
conv1
,
pool_size
=
3
,
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
images
,
label
])
pretrained_model
=
'ResNet50_pretrained'
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
# load pretrained mode and prepare datal
def
if_exist
(
var
):
return
os
.
path
.
exists
(
os
.
path
.
join
(
pretrained_model
,
var
.
name
))
fluid
.
io
.
load_vars
(
exe
,
pretrained_model
,
main_program
=
fluid
.
default_main_program
(),
predicate
=
if_exist
)
return
os
.
path
.
exists
(
os
.
path
.
join
(
pretrained_model
,
var
.
name
))
fluid
.
io
.
load_vars
(
exe
,
pretrained_model
,
main_program
=
fluid
.
default_main_program
(),
predicate
=
if_exist
)
train_data
=
paddle
.
dataset
.
cifar
.
train10
()
step
=
0
...
...
@@ -86,11 +102,13 @@ step = 0
for
data
in
train_data
():
pre_data
=
[]
pre_data
.
append
(
data
)
res
=
exe
.
run
(
program
=
fluid
.
default_main_program
(),
feed
=
feeder
.
feed
(
pre_data
),
fetch_list
=
[
pool
.
name
])
sample
=
[
res
[
0
][
0
].
tolist
(),
data
[
1
]]
res
=
exe
.
run
(
program
=
fluid
.
default_main_program
(),
feed
=
feeder
.
feed
(
pre_data
),
fetch_list
=
[
pool
.
name
])
sample
=
[
res
[
0
][
0
].
tolist
(),
data
[
1
]]
step
+=
1
file
=
msgpack
.
dumps
(
sample
)
r
.
set
(
step
,
file
)
r
.
set
(
step
,
file
)
if
step
%
100
==
0
:
print
(
numpy
.
array
(
sample
[
0
]).
shape
)
print
(
"%dstart"
%
step
)
...
...
@@ -99,6 +117,4 @@ files = r.keys()
print
(
"upload file numbers: %d"
%
len
(
files
))
end_time
=
time
.
time
()
total_time
=
end_time
-
start_time
print
(
"total time: %d"
%
total_time
)
print
(
"total time: %d"
%
total_time
)
contrib/data_safety_training/image_classification/submitter.py
浏览文件 @
a6caa651
...
...
@@ -2,7 +2,7 @@ import zmq
import
socket
import
msgpack
import
os
mission_dict
=
{
"mission"
:
"image classification"
,
"image_size"
:
[
3
,
32
,
32
]}
mission_dict
=
{
"mission"
:
"image classification"
,
"image_size"
:
[
3
,
32
,
32
]}
#send request
context
=
zmq
.
Context
()
zmq_socket
=
context
.
socket
(
zmq
.
REQ
)
...
...
paddle_fl/examples/dpsgd_demo/fl_master.py
浏览文件 @
a6caa651
...
...
@@ -4,16 +4,22 @@ from paddle_fl.core.master.job_generator import JobGenerator
from
paddle_fl.core.strategy.fl_strategy_base
import
FLStrategyFactory
import
math
class
Model
(
object
):
def
__init__
(
self
):
pass
def
lr_network
(
self
):
self
.
inputs
=
fluid
.
layers
.
data
(
name
=
'img'
,
shape
=
[
1
,
28
,
28
],
dtype
=
"float32"
)
self
.
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
self
.
predict
=
fluid
.
layers
.
fc
(
input
=
self
.
inputs
,
size
=
10
,
act
=
'softmax'
)
self
.
sum_cost
=
fluid
.
layers
.
cross_entropy
(
input
=
self
.
predict
,
label
=
self
.
label
)
self
.
accuracy
=
fluid
.
layers
.
accuracy
(
input
=
self
.
predict
,
label
=
self
.
label
)
self
.
inputs
=
fluid
.
layers
.
data
(
name
=
'img'
,
shape
=
[
1
,
28
,
28
],
dtype
=
"float32"
)
self
.
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
self
.
predict
=
fluid
.
layers
.
fc
(
input
=
self
.
inputs
,
size
=
10
,
act
=
'softmax'
)
self
.
sum_cost
=
fluid
.
layers
.
cross_entropy
(
input
=
self
.
predict
,
label
=
self
.
label
)
self
.
accuracy
=
fluid
.
layers
.
accuracy
(
input
=
self
.
predict
,
label
=
self
.
label
)
self
.
loss
=
fluid
.
layers
.
mean
(
self
.
sum_cost
)
self
.
startup_program
=
fluid
.
default_startup_program
()
...
...
@@ -23,7 +29,7 @@ model.lr_network()
STEP_EPSILON
=
0.1
DELTA
=
0.00001
SIGMA
=
math
.
sqrt
(
2.0
*
math
.
log
(
1.25
/
DELTA
))
/
STEP_EPSILON
SIGMA
=
math
.
sqrt
(
2.0
*
math
.
log
(
1.25
/
DELTA
))
/
STEP_EPSILON
CLIP
=
4.0
batch_size
=
64
...
...
@@ -33,7 +39,8 @@ job_generator.set_optimizer(optimizer)
job_generator
.
set_losses
([
model
.
loss
])
job_generator
.
set_startup_program
(
model
.
startup_program
)
job_generator
.
set_infer_feed_and_target_names
(
[
model
.
inputs
.
name
,
model
.
label
.
name
],
[
model
.
loss
.
name
,
model
.
accuracy
.
name
])
[
model
.
inputs
.
name
,
model
.
label
.
name
],
[
model
.
loss
.
name
,
model
.
accuracy
.
name
])
build_strategy
=
FLStrategyFactory
()
build_strategy
.
dpsgd
=
True
...
...
paddle_fl/examples/dpsgd_demo/fl_scheduler.py
浏览文件 @
a6caa651
...
...
@@ -3,7 +3,7 @@ from paddle_fl.core.scheduler.agent_master import FLScheduler
worker_num
=
4
server_num
=
1
#Define number of worker/server and the port for scheduler
scheduler
=
FLScheduler
(
worker_num
,
server_num
,
port
=
9091
)
scheduler
=
FLScheduler
(
worker_num
,
server_num
,
port
=
9091
)
scheduler
.
set_sample_worker_num
(
4
)
scheduler
.
init_env
()
print
(
"init env done."
)
...
...
paddle_fl/examples/dpsgd_demo/fl_server.py
浏览文件 @
a6caa651
...
...
@@ -21,7 +21,7 @@ server_id = 0
job_path
=
"fl_job_config"
job
=
FLRunTimeJob
()
job
.
load_server_job
(
job_path
,
server_id
)
job
.
_scheduler_ep
=
"127.0.0.1:9091"
# IP address for scheduler
job
.
_scheduler_ep
=
"127.0.0.1:9091"
# IP address for scheduler
server
.
set_server_job
(
job
)
server
.
_current_ep
=
"127.0.0.1:8181"
# IP address for server
server
.
_current_ep
=
"127.0.0.1:8181"
# IP address for server
server
.
start
()
paddle_fl/examples/dpsgd_demo/fl_trainer.py
浏览文件 @
a6caa651
...
...
@@ -7,44 +7,51 @@ import paddle.fluid as fluid
import
logging
import
math
logging
.
basicConfig
(
filename
=
"test.log"
,
filemode
=
"w"
,
format
=
"%(asctime)s %(name)s:%(levelname)s:%(message)s"
,
datefmt
=
"%d-%M-%Y %H:%M:%S"
,
level
=
logging
.
DEBUG
)
logging
.
basicConfig
(
filename
=
"test.log"
,
filemode
=
"w"
,
format
=
"%(asctime)s %(name)s:%(levelname)s:%(message)s"
,
datefmt
=
"%d-%M-%Y %H:%M:%S"
,
level
=
logging
.
DEBUG
)
trainer_id
=
int
(
sys
.
argv
[
1
])
# trainer id for each guest
trainer_id
=
int
(
sys
.
argv
[
1
])
# trainer id for each guest
job_path
=
"fl_job_config"
job
=
FLRunTimeJob
()
job
.
load_trainer_job
(
job_path
,
trainer_id
)
job
.
_scheduler_ep
=
"127.0.0.1:9091"
# Inform scheduler IP address to trainer
job
.
_scheduler_ep
=
"127.0.0.1:9091"
# Inform scheduler IP address to trainer
trainer
=
FLTrainerFactory
().
create_fl_trainer
(
job
)
trainer
.
_current_ep
=
"127.0.0.1:{}"
.
format
(
9000
+
trainer_id
)
trainer
.
_current_ep
=
"127.0.0.1:{}"
.
format
(
9000
+
trainer_id
)
trainer
.
start
()
test_program
=
trainer
.
_main_program
.
clone
(
for_test
=
True
)
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
mnist
.
train
(),
buf_size
=
500
),
batch_size
=
64
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
64
)
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
mnist
.
train
(),
buf_size
=
500
),
batch_size
=
64
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
64
)
img
=
fluid
.
layers
.
data
(
name
=
'img'
,
shape
=
[
1
,
28
,
28
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
img
,
label
],
place
=
fluid
.
CPUPlace
())
def
train_test
(
train_test_program
,
train_test_feed
,
train_test_reader
):
acc_set
=
[]
for
test_data
in
train_test_reader
():
acc_np
=
trainer
.
exe
.
run
(
program
=
train_test_program
,
feed
=
train_test_feed
.
feed
(
test_data
),
fetch_list
=
[
"accuracy_0.tmp_0"
])
acc_np
=
trainer
.
exe
.
run
(
program
=
train_test_program
,
feed
=
train_test_feed
.
feed
(
test_data
),
fetch_list
=
[
"accuracy_0.tmp_0"
])
acc_set
.
append
(
float
(
acc_np
[
0
]))
acc_val_mean
=
numpy
.
array
(
acc_set
).
mean
()
return
acc_val_mean
def
compute_privacy_budget
(
sample_ratio
,
epsilon
,
step
,
delta
):
E
=
2
*
epsilon
*
math
.
sqrt
(
step
*
sample_ratio
)
print
(
"({0}, {1})-DP"
.
format
(
E
,
delta
))
output_folder
=
"model_node%d"
%
trainer_id
epoch_id
=
0
step
=
0
...
...
@@ -64,7 +71,8 @@ while not trainer.stop():
train_test_feed
=
feeder
)
print
(
"Test with epoch %d, accuracy: %s"
%
(
epoch_id
,
acc_val
))
compute_privacy_budget
(
sample_ratio
=
0.001
,
epsilon
=
0.1
,
step
=
step
,
delta
=
0.00001
)
compute_privacy_budget
(
sample_ratio
=
0.001
,
epsilon
=
0.1
,
step
=
step
,
delta
=
0.00001
)
save_dir
=
(
output_folder
+
"/epoch_%d"
)
%
epoch_id
trainer
.
save_inference_program
(
output_folder
)
paddle_fl/examples/femnist_demo/fl_master.py
浏览文件 @
a6caa651
...
...
@@ -9,14 +9,31 @@ class Model(object):
pass
def
cnn
(
self
):
self
.
inputs
=
fluid
.
layers
.
data
(
name
=
'img'
,
shape
=
[
1
,
28
,
28
],
dtype
=
"float32"
)
self
.
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
self
.
conv_pool_1
=
fluid
.
nets
.
simple_img_conv_pool
(
input
=
self
.
inputs
,
num_filters
=
20
,
filter_size
=
5
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
'relu'
)
self
.
conv_pool_2
=
fluid
.
nets
.
simple_img_conv_pool
(
input
=
self
.
conv_pool_1
,
num_filters
=
50
,
filter_size
=
5
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
'relu'
)
self
.
predict
=
self
.
predict
=
fluid
.
layers
.
fc
(
input
=
self
.
conv_pool_2
,
size
=
62
,
act
=
'softmax'
)
self
.
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
self
.
predict
,
label
=
self
.
label
)
self
.
accuracy
=
fluid
.
layers
.
accuracy
(
input
=
self
.
predict
,
label
=
self
.
label
)
self
.
inputs
=
fluid
.
layers
.
data
(
name
=
'img'
,
shape
=
[
1
,
28
,
28
],
dtype
=
"float32"
)
self
.
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
self
.
conv_pool_1
=
fluid
.
nets
.
simple_img_conv_pool
(
input
=
self
.
inputs
,
num_filters
=
20
,
filter_size
=
5
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
'relu'
)
self
.
conv_pool_2
=
fluid
.
nets
.
simple_img_conv_pool
(
input
=
self
.
conv_pool_1
,
num_filters
=
50
,
filter_size
=
5
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
'relu'
)
self
.
predict
=
self
.
predict
=
fluid
.
layers
.
fc
(
input
=
self
.
conv_pool_2
,
size
=
62
,
act
=
'softmax'
)
self
.
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
self
.
predict
,
label
=
self
.
label
)
self
.
accuracy
=
fluid
.
layers
.
accuracy
(
input
=
self
.
predict
,
label
=
self
.
label
)
self
.
loss
=
fluid
.
layers
.
mean
(
self
.
cost
)
self
.
startup_program
=
fluid
.
default_startup_program
()
...
...
@@ -30,8 +47,8 @@ job_generator.set_optimizer(optimizer)
job_generator
.
set_losses
([
model
.
loss
])
job_generator
.
set_startup_program
(
model
.
startup_program
)
job_generator
.
set_infer_feed_and_target_names
(
[
model
.
inputs
.
name
,
model
.
label
.
name
],
[
model
.
loss
.
name
,
model
.
accuracy
.
name
])
[
model
.
inputs
.
name
,
model
.
label
.
name
],
[
model
.
loss
.
name
,
model
.
accuracy
.
name
])
build_strategy
=
FLStrategyFactory
()
build_strategy
.
fed_avg
=
True
...
...
paddle_fl/examples/femnist_demo/fl_scheduler.py
浏览文件 @
a6caa651
...
...
@@ -3,7 +3,7 @@ from paddle_fl.core.scheduler.agent_master import FLScheduler
worker_num
=
4
server_num
=
1
# Define the number of worker/server and the port for scheduler
scheduler
=
FLScheduler
(
worker_num
,
server_num
,
port
=
9091
)
scheduler
=
FLScheduler
(
worker_num
,
server_num
,
port
=
9091
)
scheduler
.
set_sample_worker_num
(
4
)
scheduler
.
init_env
()
print
(
"init env done."
)
...
...
paddle_fl/examples/femnist_demo/fl_server.py
浏览文件 @
a6caa651
...
...
@@ -7,7 +7,7 @@ server_id = 0
job_path
=
"fl_job_config"
job
=
FLRunTimeJob
()
job
.
load_server_job
(
job_path
,
server_id
)
job
.
_scheduler_ep
=
"127.0.0.1:9091"
# IP address for scheduler
job
.
_scheduler_ep
=
"127.0.0.1:9091"
# IP address for scheduler
server
.
set_server_job
(
job
)
server
.
_current_ep
=
"127.0.0.1:8181"
# IP address for server
server
.
_current_ep
=
"127.0.0.1:8181"
# IP address for server
server
.
start
()
paddle_fl/examples/femnist_demo/fl_trainer.py
浏览文件 @
a6caa651
...
...
@@ -8,16 +8,21 @@ import paddle.fluid as fluid
import
logging
import
math
logging
.
basicConfig
(
filename
=
"test.log"
,
filemode
=
"w"
,
format
=
"%(asctime)s %(name)s:%(levelname)s:%(message)s"
,
datefmt
=
"%d-%M-%Y %H:%M:%S"
,
level
=
logging
.
DEBUG
)
logging
.
basicConfig
(
filename
=
"test.log"
,
filemode
=
"w"
,
format
=
"%(asctime)s %(name)s:%(levelname)s:%(message)s"
,
datefmt
=
"%d-%M-%Y %H:%M:%S"
,
level
=
logging
.
DEBUG
)
trainer_id
=
int
(
sys
.
argv
[
1
])
# trainer id for each guest
trainer_id
=
int
(
sys
.
argv
[
1
])
# trainer id for each guest
job_path
=
"fl_job_config"
job
=
FLRunTimeJob
()
job
.
load_trainer_job
(
job_path
,
trainer_id
)
job
.
_scheduler_ep
=
"127.0.0.1:9091"
# Inform the scheduler IP to trainer
job
.
_scheduler_ep
=
"127.0.0.1:9091"
# Inform the scheduler IP to trainer
print
(
job
.
_target_names
)
trainer
=
FLTrainerFactory
().
create_fl_trainer
(
job
)
trainer
.
_current_ep
=
"127.0.0.1:{}"
.
format
(
9000
+
trainer_id
)
trainer
.
_current_ep
=
"127.0.0.1:{}"
.
format
(
9000
+
trainer_id
)
trainer
.
start
()
print
(
trainer
.
_step
)
test_program
=
trainer
.
_main_program
.
clone
(
for_test
=
True
)
...
...
@@ -26,26 +31,26 @@ img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
img
,
label
],
place
=
fluid
.
CPUPlace
())
def
train_test
(
train_test_program
,
train_test_feed
,
train_test_reader
):
acc_set
=
[]
for
test_data
in
train_test_reader
():
acc_np
=
trainer
.
exe
.
run
(
program
=
train_test_program
,
feed
=
train_test_feed
.
feed
(
test_data
),
fetch_list
=
[
"accuracy_0.tmp_0"
]
)
acc_set
.
append
(
float
(
acc_np
[
0
])
)
acc_val_mean
=
numpy
.
array
(
acc_set
).
mean
()
return
acc_val_mean
acc_set
=
[]
for
test_data
in
train_test_reader
():
acc_np
=
trainer
.
exe
.
run
(
program
=
train_test_program
,
feed
=
train_test_feed
.
feed
(
test_data
)
,
fetch_list
=
[
"accuracy_0.tmp_0"
])
acc_set
.
append
(
float
(
acc_np
[
0
])
)
acc_val_mean
=
numpy
.
array
(
acc_set
).
mean
(
)
return
acc_val_mean
epoch_id
=
0
step
=
0
epoch
=
3000
count_by_step
=
False
if
count_by_step
:
output_folder
=
"model_node%d"
%
trainer_id
else
:
output_folder
=
"model_node%d_epoch"
%
trainer_id
output_folder
=
"model_node%d"
%
trainer_id
else
:
output_folder
=
"model_node%d_epoch"
%
trainer_id
while
not
trainer
.
stop
():
count
=
0
...
...
@@ -55,24 +60,35 @@ while not trainer.stop():
print
(
"epoch %d start train"
%
(
epoch_id
))
#train_data,test_data= data_generater(trainer_id,inner_step=trainer._step,batch_size=64,count_by_step=count_by_step)
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle_fl
.
dataset
.
femnist
.
train
(
trainer_id
,
inner_step
=
trainer
.
_step
,
batch_size
=
64
,
count_by_step
=
count_by_step
),
buf_size
=
500
),
paddle
.
reader
.
shuffle
(
paddle_fl
.
dataset
.
femnist
.
train
(
trainer_id
,
inner_step
=
trainer
.
_step
,
batch_size
=
64
,
count_by_step
=
count_by_step
),
buf_size
=
500
),
batch_size
=
64
)
test_reader
=
paddle
.
batch
(
paddle_fl
.
dataset
.
femnist
.
test
(
trainer_id
,
inner_step
=
trainer
.
_step
,
batch_size
=
64
,
count_by_step
=
count_by_step
),
batch_size
=
64
)
paddle_fl
.
dataset
.
femnist
.
test
(
trainer_id
,
inner_step
=
trainer
.
_step
,
batch_size
=
64
,
count_by_step
=
count_by_step
),
batch_size
=
64
)
if
count_by_step
:
for
step_id
,
data
in
enumerate
(
train_reader
()):
for
step_id
,
data
in
enumerate
(
train_reader
()):
acc
=
trainer
.
run
(
feeder
.
feed
(
data
),
fetch
=
[
"accuracy_0.tmp_0"
])
step
+=
1
count
+=
1
print
(
count
)
if
count
%
trainer
.
_step
==
0
:
if
count
%
trainer
.
_step
==
0
:
break
# print("acc:%.3f" % (acc[0]))
else
:
trainer
.
run_with_epoch
(
train_reader
,
feeder
,
fetch
=
[
"accuracy_0.tmp_0"
],
num_epoch
=
1
)
trainer
.
run_with_epoch
(
train_reader
,
feeder
,
fetch
=
[
"accuracy_0.tmp_0"
],
num_epoch
=
1
)
acc_val
=
train_test
(
train_test_program
=
test_program
,
...
...
@@ -80,6 +96,6 @@ while not trainer.stop():
train_test_feed
=
feeder
)
print
(
"Test with epoch %d, accuracy: %s"
%
(
epoch_id
,
acc_val
))
if
trainer_id
==
0
:
if
trainer_id
==
0
:
save_dir
=
(
output_folder
+
"/epoch_%d"
)
%
epoch_id
trainer
.
save_inference_program
(
output_folder
)
paddle_fl/examples/gru4rec_demo/fl_master.py
浏览文件 @
a6caa651
...
...
@@ -3,6 +3,7 @@ import paddle_fl as fl
from
paddle_fl.core.master.job_generator
import
JobGenerator
from
paddle_fl.core.strategy.fl_strategy_base
import
FLStrategyFactory
class
Model
(
object
):
def
__init__
(
self
):
pass
...
...
@@ -34,7 +35,8 @@ class Model(object):
size
=
hid_size
*
3
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=
init_low_bound
,
high
=
init_high_bound
),
low
=
init_low_bound
,
high
=
init_high_bound
),
learning_rate
=
gru_lr_x
))
gru_h0
=
fluid
.
layers
.
dynamic_gru
(
input
=
fc0
,
...
...
@@ -45,12 +47,13 @@ class Model(object):
learning_rate
=
gru_lr_x
))
self
.
fc
=
fluid
.
layers
.
fc
(
input
=
gru_h0
,
size
=
vocab_size
,
act
=
'softmax'
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=
init_low_bound
,
high
=
init_high_bound
),
learning_rate
=
fc_lr_x
))
size
=
vocab_size
,
act
=
'softmax'
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=
init_low_bound
,
high
=
init_high_bound
),
learning_rate
=
fc_lr_x
))
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
self
.
fc
,
label
=
self
.
dst_wordseq
)
self
.
acc
=
fluid
.
layers
.
accuracy
(
...
...
@@ -59,7 +62,6 @@ class Model(object):
self
.
startup_program
=
fluid
.
default_startup_program
()
model
=
Model
()
model
.
gru4rec_network
()
...
...
@@ -69,7 +71,8 @@ job_generator.set_optimizer(optimizer)
job_generator
.
set_losses
([
model
.
loss
])
job_generator
.
set_startup_program
(
model
.
startup_program
)
job_generator
.
set_infer_feed_and_target_names
(
[
model
.
src_wordseq
.
name
,
model
.
dst_wordseq
.
name
],
[
model
.
loss
.
name
,
model
.
acc
.
name
])
[
model
.
src_wordseq
.
name
,
model
.
dst_wordseq
.
name
],
[
model
.
loss
.
name
,
model
.
acc
.
name
])
build_strategy
=
FLStrategyFactory
()
build_strategy
.
fed_avg
=
True
...
...
paddle_fl/examples/gru4rec_demo/fl_scheduler.py
浏览文件 @
a6caa651
...
...
@@ -3,7 +3,7 @@ from paddle_fl.core.scheduler.agent_master import FLScheduler
worker_num
=
4
server_num
=
1
# Define the number of worker/server and the port for scheduler
scheduler
=
FLScheduler
(
worker_num
,
server_num
,
port
=
9091
)
scheduler
=
FLScheduler
(
worker_num
,
server_num
,
port
=
9091
)
scheduler
.
set_sample_worker_num
(
4
)
scheduler
.
init_env
()
print
(
"init env done."
)
...
...
paddle_fl/examples/gru4rec_demo/fl_server.py
浏览文件 @
a6caa651
...
...
@@ -21,7 +21,7 @@ server_id = 0
job_path
=
"fl_job_config"
job
=
FLRunTimeJob
()
job
.
load_server_job
(
job_path
,
server_id
)
job
.
_scheduler_ep
=
"127.0.0.1:9091"
# IP address for scheduler
job
.
_scheduler_ep
=
"127.0.0.1:9091"
# IP address for scheduler
server
.
set_server_job
(
job
)
server
.
_current_ep
=
"127.0.0.1:8181"
# IP address for server
server
.
_current_ep
=
"127.0.0.1:8181"
# IP address for server
server
.
start
()
paddle_fl/examples/gru4rec_demo/fl_trainer.py
浏览文件 @
a6caa651
...
...
@@ -6,21 +6,26 @@ import numpy as np
import
sys
import
os
import
logging
logging
.
basicConfig
(
filename
=
"test.log"
,
filemode
=
"w"
,
format
=
"%(asctime)s %(name)s:%(levelname)s:%(message)s"
,
datefmt
=
"%d-%M-%Y %H:%M:%S"
,
level
=
logging
.
DEBUG
)
logging
.
basicConfig
(
filename
=
"test.log"
,
filemode
=
"w"
,
format
=
"%(asctime)s %(name)s:%(levelname)s:%(message)s"
,
datefmt
=
"%d-%M-%Y %H:%M:%S"
,
level
=
logging
.
DEBUG
)
trainer_id
=
int
(
sys
.
argv
[
1
])
# trainer id for each guest
trainer_id
=
int
(
sys
.
argv
[
1
])
# trainer id for each guest
place
=
fluid
.
CPUPlace
()
train_file_dir
=
"mid_data/node4/%d/"
%
trainer_id
job_path
=
"fl_job_config"
job
=
FLRunTimeJob
()
job
.
load_trainer_job
(
job_path
,
trainer_id
)
job
.
_scheduler_ep
=
"127.0.0.1:9091"
# Inform the scheduler IP to trainer
job
.
_scheduler_ep
=
"127.0.0.1:9091"
# Inform the scheduler IP to trainer
trainer
=
FLTrainerFactory
().
create_fl_trainer
(
job
)
trainer
.
_current_ep
=
"127.0.0.1:{}"
.
format
(
9000
+
trainer_id
)
trainer
.
_current_ep
=
"127.0.0.1:{}"
.
format
(
9000
+
trainer_id
)
trainer
.
start
()
r
=
Gru4rec_Reader
()
train_reader
=
r
.
reader
(
train_file_dir
,
place
,
batch_size
=
125
)
train_reader
=
r
.
reader
(
train_file_dir
,
place
,
batch_size
=
125
)
output_folder
=
"model_node4"
step_i
=
0
...
...
@@ -30,8 +35,7 @@ while not trainer.stop():
train_step
=
0
for
data
in
train_reader
():
#print(np.array(data['src_wordseq']))
ret_avg_cost
=
trainer
.
run
(
feed
=
data
,
fetch
=
[
"mean_0.tmp_0"
])
ret_avg_cost
=
trainer
.
run
(
feed
=
data
,
fetch
=
[
"mean_0.tmp_0"
])
train_step
+=
1
if
train_step
==
trainer
.
_step
:
break
...
...
paddle_fl/examples/k8s_deployment/master/fl_master.py
浏览文件 @
a6caa651
...
...
@@ -5,6 +5,7 @@ import paddle_fl as fl
from
paddle_fl.core.master.job_generator
import
JobGenerator
from
paddle_fl.core.strategy.fl_strategy_base
import
FLStrategyFactory
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
"master"
)
parser
.
add_argument
(
...
...
@@ -12,7 +13,7 @@ def parse_args():
type
=
int
,
default
=
2
,
help
=
'number of trainer(default: 2)'
)
return
parser
.
parse_args
()
...
...
@@ -25,7 +26,8 @@ class Model(object):
self
.
fc1
=
fluid
.
layers
.
fc
(
input
=
self
.
concat
,
size
=
256
,
act
=
'relu'
)
self
.
fc2
=
fluid
.
layers
.
fc
(
input
=
self
.
fc1
,
size
=
128
,
act
=
'relu'
)
self
.
predict
=
fluid
.
layers
.
fc
(
input
=
self
.
fc2
,
size
=
2
,
act
=
'softmax'
)
self
.
sum_cost
=
fluid
.
layers
.
cross_entropy
(
input
=
self
.
predict
,
label
=
label
)
self
.
sum_cost
=
fluid
.
layers
.
cross_entropy
(
input
=
self
.
predict
,
label
=
label
)
self
.
accuracy
=
fluid
.
layers
.
accuracy
(
input
=
self
.
predict
,
label
=
label
)
self
.
loss
=
fluid
.
layers
.
reduce_mean
(
self
.
sum_cost
)
self
.
startup_program
=
fluid
.
default_startup_program
()
...
...
@@ -47,8 +49,8 @@ optimizer = fluid.optimizer.SGD(learning_rate=0.1)
job_generator
.
set_optimizer
(
optimizer
)
job_generator
.
set_losses
([
model
.
loss
])
job_generator
.
set_startup_program
(
model
.
startup_program
)
job_generator
.
set_infer_feed_and_target_names
(
[
x
.
name
for
x
in
inputs
],
[
model
.
predict
.
name
])
job_generator
.
set_infer_feed_and_target_names
(
[
x
.
name
for
x
in
inputs
],
[
model
.
predict
.
name
])
build_strategy
=
FLStrategyFactory
()
build_strategy
.
fed_avg
=
True
...
...
@@ -57,7 +59,8 @@ strategy = build_strategy.create_fl_strategy()
# endpoints will be collected through the cluster
# in this example, we suppose endpoints have been collected
server_service_ip
=
os
.
environ
[
'FL_SERVER_SERVICE_HOST'
]
+
":"
+
os
.
environ
[
'FL_SERVER_SERVICE_PORT_FL_SERVER'
]
server_service_ip
=
os
.
environ
[
'FL_SERVER_SERVICE_HOST'
]
+
":"
+
os
.
environ
[
'FL_SERVER_SERVICE_PORT_FL_SERVER'
]
service_endpoints
=
[
server_service_ip
]
pod_endpoints
=
[
"0.0.0.0:8181"
]
output
=
"fl_job_config"
...
...
@@ -68,4 +71,8 @@ num_trainer = args.trainer_num
# fl_job_config will be dispatched to workers
job_generator
.
generate_fl_job_for_k8s
(
strategy
,
server_pod_endpoints
=
pod_endpoints
,
server_service_endpoints
=
service_endpoints
,
worker_num
=
2
,
output
=
output
)
strategy
,
server_pod_endpoints
=
pod_endpoints
,
server_service_endpoints
=
service_endpoints
,
worker_num
=
2
,
output
=
output
)
paddle_fl/examples/k8s_deployment/scheduler/fl_scheduler.py
浏览文件 @
a6caa651
import
argparse
from
paddle_fl.core.scheduler.agent_master
import
FLScheduler
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
"scheduler"
)
parser
.
add_argument
(
...
...
@@ -11,12 +12,13 @@ def parse_args():
return
parser
.
parse_args
()
args
=
parse_args
()
num_trainer
=
args
.
trainer_num
worker_num
=
num_trainer
server_num
=
1
# Define the number of worker/server and the port for scheduler
scheduler
=
FLScheduler
(
worker_num
,
server_num
,
port
=
9091
)
scheduler
=
FLScheduler
(
worker_num
,
server_num
,
port
=
9091
)
scheduler
.
set_sample_worker_num
(
worker_num
)
scheduler
.
init_env
()
print
(
"init env done."
)
...
...
paddle_fl/examples/k8s_deployment/server/fl_server.py
浏览文件 @
a6caa651
...
...
@@ -23,10 +23,12 @@ server_id = 0
job_path
=
"fl_job_config"
job
=
FLRunTimeJob
()
job
.
load_server_job
(
job_path
,
server_id
)
job
.
_scheduler_ep
=
os
.
environ
[
'FL_SCHEDULER_SERVICE_HOST'
]
+
":"
+
os
.
environ
[
'FL_SCHEDULER_SERVICE_PORT_FL_SCHEDULER'
]
# IP address for scheduler
job
.
_scheduler_ep
=
os
.
environ
[
'FL_SCHEDULER_SERVICE_HOST'
]
+
":"
+
os
.
environ
[
'FL_SCHEDULER_SERVICE_PORT_FL_SCHEDULER'
]
# IP address for scheduler
#job._endpoints = os.environ['POD_IP'] + ":" + os.environ['FL_SERVER_SERVICE_PORT_FL_SERVER'] # IP address for server
server
.
set_server_job
(
job
)
server
.
_current_ep
=
os
.
environ
[
'FL_SERVER_SERVICE_HOST'
]
+
":"
+
os
.
environ
[
'FL_SERVER_SERVICE_PORT_FL_SERVER'
]
# IP address for server
print
(
job
.
_scheduler_ep
,
server
.
_current_ep
)
server
.
_current_ep
=
os
.
environ
[
'FL_SERVER_SERVICE_HOST'
]
+
":"
+
os
.
environ
[
'FL_SERVER_SERVICE_PORT_FL_SERVER'
]
# IP address for server
print
(
job
.
_scheduler_ep
,
server
.
_current_ep
)
server
.
start
()
print
(
"connect"
)
paddle_fl/examples/k8s_deployment/trainer0/fl_trainer.py
浏览文件 @
a6caa651
...
...
@@ -5,7 +5,12 @@ import sys
import
os
import
logging
import
time
logging
.
basicConfig
(
filename
=
"test.log"
,
filemode
=
"w"
,
format
=
"%(asctime)s %(name)s:%(levelname)s:%(message)s"
,
datefmt
=
"%d-%M-%Y %H:%M:%S"
,
level
=
logging
.
DEBUG
)
logging
.
basicConfig
(
filename
=
"test.log"
,
filemode
=
"w"
,
format
=
"%(asctime)s %(name)s:%(levelname)s:%(message)s"
,
datefmt
=
"%d-%M-%Y %H:%M:%S"
,
level
=
logging
.
DEBUG
)
def
reader
():
...
...
@@ -16,15 +21,18 @@ def reader():
data_dict
[
"label"
]
=
np
.
random
.
randint
(
2
,
size
=
(
1
,
1
)).
astype
(
'int64'
)
yield
data_dict
trainer_id
=
int
(
sys
.
argv
[
1
])
# trainer id for each guest
trainer_id
=
int
(
sys
.
argv
[
1
])
# trainer id for each guest
job_path
=
"fl_job_config"
job
=
FLRunTimeJob
()
job
.
load_trainer_job
(
job_path
,
trainer_id
)
#job._scheduler_ep = "127.0.0.1:9091" # Inform the scheduler IP to trainer
job
.
_scheduler_ep
=
os
.
environ
[
'FL_SCHEDULER_SERVICE_HOST'
]
+
":"
+
os
.
environ
[
'FL_SCHEDULER_SERVICE_PORT_FL_SCHEDULER'
]
job
.
_scheduler_ep
=
os
.
environ
[
'FL_SCHEDULER_SERVICE_HOST'
]
+
":"
+
os
.
environ
[
'FL_SCHEDULER_SERVICE_PORT_FL_SCHEDULER'
]
trainer
=
FLTrainerFactory
().
create_fl_trainer
(
job
)
#trainer._current_ep = "127.0.0.1:{}".format(9000+trainer_id)
trainer
.
_current_ep
=
os
.
environ
[
'TRAINER0_SERVICE_HOST'
]
+
":"
+
os
.
environ
[
'TRAINER0_SERVICE_PORT_TRAINER0'
]
trainer
.
_current_ep
=
os
.
environ
[
'TRAINER0_SERVICE_HOST'
]
+
":"
+
os
.
environ
[
'TRAINER0_SERVICE_PORT_TRAINER0'
]
trainer
.
start
()
print
(
trainer
.
_scheduler_ep
,
trainer
.
_current_ep
)
output_folder
=
"fl_model"
...
...
@@ -40,4 +48,3 @@ while not trainer.stop():
epoch_id
+=
1
if
epoch_id
%
5
==
0
:
trainer
.
save_inference_program
(
output_folder
)
paddle_fl/examples/k8s_deployment/trainer1/fl_trainer.py
浏览文件 @
a6caa651
...
...
@@ -5,7 +5,12 @@ import sys
import
os
import
logging
import
time
logging
.
basicConfig
(
filename
=
"test.log"
,
filemode
=
"w"
,
format
=
"%(asctime)s %(name)s:%(levelname)s:%(message)s"
,
datefmt
=
"%d-%M-%Y %H:%M:%S"
,
level
=
logging
.
DEBUG
)
logging
.
basicConfig
(
filename
=
"test.log"
,
filemode
=
"w"
,
format
=
"%(asctime)s %(name)s:%(levelname)s:%(message)s"
,
datefmt
=
"%d-%M-%Y %H:%M:%S"
,
level
=
logging
.
DEBUG
)
def
reader
():
...
...
@@ -16,15 +21,18 @@ def reader():
data_dict
[
"label"
]
=
np
.
random
.
randint
(
2
,
size
=
(
1
,
1
)).
astype
(
'int64'
)
yield
data_dict
trainer_id
=
int
(
sys
.
argv
[
1
])
# trainer id for each guest
trainer_id
=
int
(
sys
.
argv
[
1
])
# trainer id for each guest
job_path
=
"fl_job_config"
job
=
FLRunTimeJob
()
job
.
load_trainer_job
(
job_path
,
trainer_id
)
#job._scheduler_ep = "127.0.0.1:9091" # Inform the scheduler IP to trainer
job
.
_scheduler_ep
=
os
.
environ
[
'FL_SCHEDULER_SERVICE_HOST'
]
+
":"
+
os
.
environ
[
'FL_SCHEDULER_SERVICE_PORT_FL_SCHEDULER'
]
job
.
_scheduler_ep
=
os
.
environ
[
'FL_SCHEDULER_SERVICE_HOST'
]
+
":"
+
os
.
environ
[
'FL_SCHEDULER_SERVICE_PORT_FL_SCHEDULER'
]
trainer
=
FLTrainerFactory
().
create_fl_trainer
(
job
)
#trainer._current_ep = "127.0.0.1:{}".format(9000+trainer_id)
trainer
.
_current_ep
=
os
.
environ
[
'TRAINER1_SERVICE_HOST'
]
+
":"
+
os
.
environ
[
'TRAINER1_SERVICE_PORT_TRAINER1'
]
trainer
.
_current_ep
=
os
.
environ
[
'TRAINER1_SERVICE_HOST'
]
+
":"
+
os
.
environ
[
'TRAINER1_SERVICE_PORT_TRAINER1'
]
trainer
.
start
()
print
(
trainer
.
_scheduler_ep
,
trainer
.
_current_ep
)
output_folder
=
"fl_model"
...
...
@@ -40,4 +48,3 @@ while not trainer.stop():
epoch_id
+=
1
if
epoch_id
%
5
==
0
:
trainer
.
save_inference_program
(
output_folder
)
paddle_fl/examples/secagg_demo/fl_master.py
浏览文件 @
a6caa651
...
...
@@ -3,6 +3,7 @@ import paddle_fl as fl
from
paddle_fl.core.master.job_generator
import
JobGenerator
from
paddle_fl.core.strategy.fl_strategy_base
import
FLStrategyFactory
class
Model
(
object
):
def
__init__
(
self
):
pass
...
...
@@ -14,12 +15,17 @@ class Model(object):
param_attrs
=
fluid
.
ParamAttr
(
name
=
"fc_0.w_0"
,
initializer
=
fluid
.
initializer
.
ConstantInitializer
(
0.0
))
self
.
predict
=
fluid
.
layers
.
fc
(
input
=
inputs
,
size
=
10
,
act
=
'softmax'
,
param_attr
=
param_attrs
)
self
.
sum_cost
=
fluid
.
layers
.
cross_entropy
(
input
=
self
.
predict
,
label
=
label
)
self
.
predict
=
fluid
.
layers
.
fc
(
input
=
inputs
,
size
=
10
,
act
=
'softmax'
,
param_attr
=
param_attrs
)
self
.
sum_cost
=
fluid
.
layers
.
cross_entropy
(
input
=
self
.
predict
,
label
=
label
)
self
.
loss
=
fluid
.
layers
.
mean
(
self
.
sum_cost
)
self
.
accuracy
=
fluid
.
layers
.
accuracy
(
input
=
self
.
predict
,
label
=
label
)
self
.
startup_program
=
fluid
.
default_startup_program
()
inputs
=
fluid
.
layers
.
data
(
name
=
'x'
,
shape
=
[
1
,
28
,
28
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'y'
,
shape
=
[
1
],
dtype
=
'int64'
)
...
...
@@ -31,15 +37,16 @@ optimizer = fluid.optimizer.SGD(learning_rate=0.01)
job_generator
.
set_optimizer
(
optimizer
)
job_generator
.
set_losses
([
model
.
loss
])
job_generator
.
set_startup_program
(
model
.
startup_program
)
job_generator
.
set_infer_feed_and_target_names
(
[
inputs
.
name
,
label
.
name
],
[
model
.
loss
.
name
])
job_generator
.
set_infer_feed_and_target_names
(
[
inputs
.
name
,
label
.
name
],
[
model
.
loss
.
name
])
build_strategy
=
FLStrategyFactory
()
#build_strategy.fed_avg = True
build_strategy
.
sec_agg
=
True
param_name_list
=
[]
param_name_list
.
append
(
"fc_0.w_0.opti.trainer_"
)
# need trainer_id when running
param_name_list
.
append
(
"fc_0.w_0.opti.trainer_"
)
# need trainer_id when running
param_name_list
.
append
(
"fc_0.b_0.opti.trainer_"
)
build_strategy
.
param_name_list
=
param_name_list
...
...
paddle_fl/examples/secagg_demo/fl_scheduler.py
浏览文件 @
a6caa651
...
...
@@ -3,7 +3,7 @@ from paddle_fl.core.scheduler.agent_master import FLScheduler
worker_num
=
2
server_num
=
1
scheduler
=
FLScheduler
(
worker_num
,
server_num
,
port
=
9091
)
scheduler
=
FLScheduler
(
worker_num
,
server_num
,
port
=
9091
)
scheduler
.
set_sample_worker_num
(
worker_num
)
scheduler
.
init_env
()
print
(
"init env done."
)
...
...
paddle_fl/examples/secagg_demo/fl_server.py
浏览文件 @
a6caa651
...
...
@@ -21,8 +21,8 @@ server_id = 0
job_path
=
"fl_job_config"
job
=
FLRunTimeJob
()
job
.
load_server_job
(
job_path
,
server_id
)
job
.
_scheduler_ep
=
"127.0.0.1:9091"
# IP address for scheduler
job
.
_scheduler_ep
=
"127.0.0.1:9091"
# IP address for scheduler
server
.
set_server_job
(
job
)
server
.
_current_ep
=
"127.0.0.1:8181"
# IP address for server
server
.
_current_ep
=
"127.0.0.1:8181"
# IP address for server
server
.
start
()
print
(
"connect"
)
paddle_fl/examples/secagg_demo/keys/0_pub_key.txt
浏览文件 @
a6caa651
2438748580808349511143047663636683775879288034436941526695550498623461587527621346172907651006831789701999970929529915459467532662545948308044143788306668377086821294492459623439935894424167712515718436351900091777957477710004777078638317806960364609629258387413979203403741893205419691425902518810085451041187685334971769087054033027561974230347468587825700834108657561999305482311897914109364221430821533207693682979777541616125499682380618775029176238891407643926372043660610226672413497764635239787000143341827693941253721638947580506197728500367325524850325027980531066702962726949006217630290236644410746181942256812170056772600756232506116738493114591218127323741133163913140583529684827066023347088796194846253682954154504336640429027403657831470993825621749318372332546269811820953216261135662418531598954663771775691648448615131802158937156803324423733802071166119966224716088242291968098450309032800335049617861465
\ No newline at end of file
2438748580808349511143047663636683775879288034436941526695550498623461587527621346172907651006831789701999970929529915459467532662545948308044143788306668377086821294492459623439935894424167712515718436351900091777957477710004777078638317806960364609629258387413979203403741893205419691425902518810085451041187685334971769087054033027561974230347468587825700834108657561999305482311897914109364221430821533207693682979777541616125499682380618775029176238891407643926372043660610226672413497764635239787000143341827693941253721638947580506197728500367325524850325027980531066702962726949006217630290236644410746181942256812170056772600756232506116738493114591218127323741133163913140583529684827066023347088796194846253682954154504336640429027403657831470993825621749318372332546269811820953216261135662418531598954663771775691648448615131802158937156803324423733802071166119966224716088242291968098450309032800335049617861465
paddle_fl/examples/secagg_demo/keys/1_pub_key.txt
浏览文件 @
a6caa651
2514645349791449916465355128335954929464444612258498884322250411584328344530925790221013632799576102047787468232441470392901627580493383471719532612816534848391408601920539266665550346246343040368608757429591392784807798812848893304745441721044204602414415725300562075953290154457726382683020925406187524758708694161689285261293920782115270550960717687322240202298426363733065475031448888618026711435993053991653780694485897784243346859087028197560255857091562150381619708471192080403868115055265681423866891707972356449212236920210992128063075734725292359699578940877165584175851667803049667020005978253573912096358469050409541237248593428640028573437783747958382446666712845099931003578559688134007238753677011257181086592677636834099341020870502521085827878680362572013623469761170961943916356175726242515624843837354222489536469472365937707615959657846428001149463801728084949088483783942894784080451399167982957006474558
\ No newline at end of file
2514645349791449916465355128335954929464444612258498884322250411584328344530925790221013632799576102047787468232441470392901627580493383471719532612816534848391408601920539266665550346246343040368608757429591392784807798812848893304745441721044204602414415725300562075953290154457726382683020925406187524758708694161689285261293920782115270550960717687322240202298426363733065475031448888618026711435993053991653780694485897784243346859087028197560255857091562150381619708471192080403868115055265681423866891707972356449212236920210992128063075734725292359699578940877165584175851667803049667020005978253573912096358469050409541237248593428640028573437783747958382446666712845099931003578559688134007238753677011257181086592677636834099341020870502521085827878680362572013623469761170961943916356175726242515624843837354222489536469472365937707615959657846428001149463801728084949088483783942894784080451399167982957006474558
paddle_fl/examples/submitter_demo/conf.txt
浏览文件 @
a6caa651
...
...
@@ -21,4 +21,3 @@ server=yq01-hpc-lvliang01-smart-master.dmop.baidu.com
python_tar=./python.tar.gz
wheel=./paddlepaddle-0.0.0-cp27-cp27mu-linux_x86_64.whl
paddle_fl/examples/submitter_demo/model.py
浏览文件 @
a6caa651
import
paddle.fluid
as
fluid
class
Model
(
object
):
def
__init__
(
self
):
pass
...
...
@@ -9,8 +10,8 @@ class Model(object):
self
.
fc1
=
fluid
.
layers
.
fc
(
input
=
self
.
concat
,
size
=
256
,
act
=
'relu'
)
self
.
fc2
=
fluid
.
layers
.
fc
(
input
=
self
.
fc1
,
size
=
128
,
act
=
'relu'
)
self
.
predict
=
fluid
.
layers
.
fc
(
input
=
self
.
fc2
,
size
=
2
,
act
=
'softmax'
)
self
.
sum_cost
=
fluid
.
layers
.
cross_entropy
(
input
=
self
.
predict
,
label
=
label
)
self
.
sum_cost
=
fluid
.
layers
.
cross_entropy
(
input
=
self
.
predict
,
label
=
label
)
self
.
accuracy
=
fluid
.
layers
.
accuracy
(
input
=
self
.
predict
,
label
=
label
)
self
.
loss
=
fluid
.
layers
.
reduce_mean
(
self
.
sum_cost
)
self
.
startup_program
=
fluid
.
default_startup_program
()
paddle_fl/examples/submitter_demo/scheduler_client.py
浏览文件 @
a6caa651
...
...
@@ -49,6 +49,7 @@ default_dict = {
"wheel"
:
"./paddlepaddle-0.0.0-cp27-cp27mu-linux_x86_64-0.whl"
}
def
load_conf
(
conf_file
,
local_dict
):
with
open
(
conf_file
)
as
fin
:
for
line
in
fin
:
...
...
@@ -58,6 +59,7 @@ def load_conf(conf_file, local_dict):
local_dict
[
group
[
0
]]
=
group
[
1
]
return
local_dict
client
=
HPCClient
()
default_dict
=
load_conf
(
sys
.
argv
[
1
],
default_dict
)
...
...
@@ -94,9 +96,11 @@ all_ips_ready = False
ip_list
=
[]
scheduler
=
FLScheduler
(
int
(
default_dict
[
"worker_nodes"
]),
int
(
default_dict
[
"server_nodes"
]),
port
=
random_port
,
socket
=
zmq_socket
)
scheduler
=
FLScheduler
(
int
(
default_dict
[
"worker_nodes"
]),
int
(
default_dict
[
"server_nodes"
]),
port
=
random_port
,
socket
=
zmq_socket
)
scheduler
.
set_sample_worker_num
(
int
(
default_dict
[
"worker_nodes"
]))
...
...
@@ -121,12 +125,14 @@ print(ip_list)
#allocate the role of each endpoint and their ids
ip_role
=
{}
for
i
in
range
(
len
(
ip_list
)):
if
i
<
int
(
default_dict
[
"server_nodes"
]):
ip_role
[
ip_list
[
i
]]
=
'server%d'
%
i
else
:
ip_role
[
ip_list
[
i
]]
=
'trainer%d'
%
(
i
-
int
(
default_dict
[
"server_nodes"
]))
if
i
<
int
(
default_dict
[
"server_nodes"
]):
ip_role
[
ip_list
[
i
]]
=
'server%d'
%
i
else
:
ip_role
[
ip_list
[
i
]]
=
'trainer%d'
%
(
i
-
int
(
default_dict
[
"server_nodes"
]))
print
(
ip_role
)
def
job_generate
():
#generate a fl job which is the same as fl_master
inputs
=
[
fluid
.
layers
.
data
(
\
...
...
@@ -146,8 +152,8 @@ def job_generate():
job_generator
.
set_optimizer
(
optimizer
)
job_generator
.
set_losses
([
model
.
loss
])
job_generator
.
set_startup_program
(
model
.
startup_program
)
job_generator
.
set_infer_feed_and_target_names
(
[
x
.
name
for
x
in
inputs
],
[
model
.
predict
.
name
])
job_generator
.
set_infer_feed_and_target_names
(
[
x
.
name
for
x
in
inputs
],
[
model
.
predict
.
name
])
build_strategy
=
FLStrategyFactory
()
build_strategy
.
fed_avg
=
True
...
...
@@ -157,20 +163,24 @@ def job_generate():
# endpoints will be collected through the cluster
# in this example, we suppose endpoints have been collected
server_ip
=
[
"{}"
.
format
(
ip_list
[
0
])]
output
=
"job_config"
job_generator
.
generate_fl_job
(
strategy
,
server_endpoints
=
server_ip
,
worker_num
=
int
(
default_dict
[
"worker_nodes"
]),
output
=
output
)
strategy
,
server_endpoints
=
server_ip
,
worker_num
=
int
(
default_dict
[
"worker_nodes"
]),
output
=
output
)
file_list
=
os
.
listdir
(
output
)
for
file
in
file_list
:
tar
=
tarfile
.
open
(
'{}/{}.tar.gz'
.
format
(
output
,
file
),
'w:gz'
)
for
root
,
dir
,
files
in
os
.
walk
(
"{}/{}"
.
format
(
output
,
file
)):
for
f
in
files
:
fullpath
=
os
.
path
.
join
(
root
,
f
)
tar
.
add
(
fullpath
)
tar
=
tarfile
.
open
(
'{}/{}.tar.gz'
.
format
(
output
,
file
),
'w:gz'
)
for
root
,
dir
,
files
in
os
.
walk
(
"{}/{}"
.
format
(
output
,
file
)):
for
f
in
files
:
fullpath
=
os
.
path
.
join
(
root
,
f
)
tar
.
add
(
fullpath
)
tar
.
close
()
job_generate
()
#send the allocated rolls to the remote endpoints
...
...
paddle_fl/examples/submitter_demo/train_program.py
浏览文件 @
a6caa651
...
...
@@ -13,7 +13,6 @@ import sys
import
logging
import
time
random_port
=
60001
scheduler_conf
=
{}
...
...
@@ -31,8 +30,7 @@ download_url = "{}:8080".format(scheduler_ip[0])
print
(
download_url
)
context
=
zmq
.
Context
()
zmq_socket
=
context
.
socket
(
zmq
.
REQ
)
zmq_socket
.
connect
(
"tcp://{}"
.
format
(
scheduler_conf
[
"ENDPOINT"
]))
zmq_socket
.
connect
(
"tcp://{}"
.
format
(
scheduler_conf
[
"ENDPOINT"
]))
zmq_socket
.
send
(
"ENDPOINT
\t
{}"
.
format
(
endpoint
))
message
=
zmq_socket
.
recv
()
print
(
message
)
...
...
@@ -47,7 +45,7 @@ while True:
if
group
[
0
]
==
"WAIT"
:
continue
else
:
os
.
system
(
"wget {}/job_config/{}.tar.gz"
.
format
(
download_url
,
message
))
os
.
system
(
"wget {}/job_config/{}.tar.gz"
.
format
(
download_url
,
message
))
print
(
message
)
break
...
...
@@ -71,6 +69,7 @@ if 'server' in message:
server
.
_current_ep
=
endpoint
server
.
start
()
else
:
def
reader
():
for
i
in
range
(
1000
):
data_dict
=
{}
...
...
@@ -96,7 +95,7 @@ else:
for
data
in
reader
():
trainer
.
run
(
feed
=
data
,
fetch
=
[])
step_i
+=
1
if
step_i
==
trainer
.
_step
:
if
step_i
==
trainer
.
_step
:
break
epoch_id
+=
1
if
epoch_id
%
5
==
0
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录