Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleFL
提交
23bf60fc
P
PaddleFL
项目概览
PaddlePaddle
/
PaddleFL
通知
35
Star
5
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
6
列表
看板
标记
里程碑
合并请求
4
Wiki
3
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleFL
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
6
Issue
6
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
3
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
23bf60fc
编写于
11月 25, 2019
作者:
Q
qjing666
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update femnist dataset
上级
cd696e5d
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
78 addition
and
47 deletion
+78
-47
paddle_fl/dataset/femnist.py
paddle_fl/dataset/femnist.py
+70
-0
paddle_fl/examples/femnist_demo/fl_trainer.py
paddle_fl/examples/femnist_demo/fl_trainer.py
+6
-43
paddle_fl/examples/femnist_demo/run.sh
paddle_fl/examples/femnist_demo/run.sh
+1
-3
setup.py
setup.py
+1
-1
未找到文件。
paddle_fl/dataset/femnist.py
0 → 100644
浏览文件 @
23bf60fc
import
requests
import
os
import
json
import
tarfile
import
random
url
=
"https://paddlefl.bj.bcebos.com/leaf/"
target_path
=
"femnist_data"
tar_path
=
target_path
+
".tar.gz"
print
(
tar_path
)
def
download
(
url
):
r
=
requests
.
get
(
url
)
with
open
(
tar_path
,
'wb'
)
as
f
:
f
.
write
(
r
.
content
)
def
extract
(
tar_path
):
tar
=
tarfile
.
open
(
tar_path
,
"r:gz"
)
file_names
=
tar
.
getnames
()
for
file_name
in
file_names
:
tar
.
extract
(
file_name
)
tar
.
close
()
def
train
(
trainer_id
,
inner_step
,
batch_size
,
count_by_step
):
if
not
os
.
path
.
exists
(
target_path
):
print
(
"Preparing data..."
)
if
not
os
.
path
.
exists
(
tar_path
):
download
(
url
+
tar_path
)
extract
(
tar_path
)
def
train_data
():
train_file
=
open
(
"./femnist_data/train/all_data_%d_niid_0_keep_0_train_9.json"
%
trainer_id
,
'r'
)
json_train
=
json
.
load
(
train_file
)
users
=
json_train
[
"users"
]
rand
=
random
.
randrange
(
0
,
len
(
users
))
# random choose a user from each trainer
cur_user
=
users
[
rand
]
print
(
'training using '
+
cur_user
)
train_images
=
json_train
[
"user_data"
][
cur_user
][
'x'
]
train_labels
=
json_train
[
"user_data"
][
cur_user
][
'y'
]
if
count_by_step
:
for
i
in
xrange
(
inner_step
*
batch_size
):
yield
train_images
[
i
%
(
len
(
train_images
))],
train_labels
[
i
%
(
len
(
train_images
))]
else
:
for
i
in
xrange
(
len
(
train_images
)):
yield
train_images
[
i
],
train_labels
[
i
]
train_file
.
close
()
return
train_data
def
test
(
trainer_id
,
inner_step
,
batch_size
,
count_by_step
):
if
not
os
.
path
.
exists
(
target_path
):
print
(
"Preparing data..."
)
if
not
os
.
path
.
exists
(
tar_path
):
download
(
url
+
tar_path
)
extract
(
tar_path
)
def
test_data
():
test_file
=
open
(
"./femnist_data/test/all_data_%d_niid_0_keep_0_test_9.json"
%
trainer_id
,
'r'
)
json_test
=
json
.
load
(
test_file
)
users
=
json_test
[
"users"
]
for
user
in
users
:
test_images
=
json_test
[
'user_data'
][
user
][
'x'
]
test_labels
=
json_test
[
'user_data'
][
user
][
'y'
]
for
i
in
xrange
(
len
(
test_images
)):
yield
test_images
[
i
],
test_labels
[
i
]
test_file
.
close
()
return
test_data
paddle_fl/examples/femnist_demo/fl_trainer.py
浏览文件 @
23bf60fc
from
paddle_fl.core.trainer.fl_trainer
import
FLTrainerFactory
from
paddle_fl.core.master.fl_job
import
FLRunTimeJob
import
paddle_fl.dataset.femnist
import
numpy
import
sys
import
paddle
import
paddle.fluid
as
fluid
import
logging
import
math
import
random
import
json
logging
.
basicConfig
(
filename
=
"test.log"
,
filemode
=
"w"
,
format
=
"%(asctime)s %(name)s:%(levelname)s:%(message)s"
,
datefmt
=
"%d-%M-%Y %H:%M:%S"
,
level
=
logging
.
DEBUG
)
...
...
@@ -22,36 +21,6 @@ trainer.start()
print
(
trainer
.
_step
)
test_program
=
trainer
.
_main_program
.
clone
(
for_test
=
True
)
def
data_generater
(
trainer_id
,
inner_step
,
batch_size
,
count_by_step
):
train_file
=
open
(
"./femnist_data/train/all_data_%d_niid_0_keep_0_train_9.json"
%
trainer_id
,
'r'
)
test_file
=
open
(
"./femnist_data/test/all_data_%d_niid_0_keep_0_test_9.json"
%
trainer_id
,
'r'
)
json_train
=
json
.
load
(
train_file
)
json_test
=
json
.
load
(
test_file
)
users
=
json_train
[
"users"
]
rand
=
random
.
randrange
(
0
,
len
(
users
))
# random choose a user from each trainer
cur_user
=
users
[
rand
]
print
(
'training using '
+
cur_user
)
def
train_data
():
train_images
=
json_train
[
"user_data"
][
cur_user
][
'x'
]
train_labels
=
json_train
[
"user_data"
][
cur_user
][
'y'
]
if
count_by_step
:
for
i
in
xrange
(
inner_step
*
batch_size
):
yield
train_images
[
i
%
(
len
(
train_images
))],
train_labels
[
i
%
(
len
(
train_images
))]
else
:
for
i
in
xrange
(
len
(
train_images
)):
yield
train_images
[
i
],
train_labels
[
i
]
def
test_data
():
for
user
in
users
:
test_images
=
json_test
[
'user_data'
][
user
][
'x'
]
test_labels
=
json_test
[
'user_data'
][
user
][
'y'
]
for
i
in
xrange
(
len
(
test_images
)):
yield
test_images
[
i
],
test_labels
[
i
]
train_file
.
close
()
test_file
.
close
()
return
train_data
,
test_data
img
=
fluid
.
layers
.
data
(
name
=
'img'
,
shape
=
[
1
,
28
,
28
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
[
img
,
label
],
place
=
fluid
.
CPUPlace
())
...
...
@@ -67,13 +36,6 @@ def train_test(train_test_program, train_test_feed, train_test_reader):
acc_val_mean
=
numpy
.
array
(
acc_set
).
mean
()
return
acc_val_mean
def
compute_privacy_budget
(
sample_ratio
,
epsilon
,
step
,
delta
):
E
=
2
*
epsilon
*
math
.
sqrt
(
step
*
sample_ratio
)
print
(
"({0}, {1})-DP"
.
format
(
E
,
delta
))
epoch_id
=
0
step
=
0
epoch
=
3000
...
...
@@ -90,13 +52,15 @@ while not trainer.stop():
if
epoch_id
>
epoch
:
break
print
(
"epoch %d start train"
%
(
epoch_id
))
train_data
,
test_data
=
data_generater
(
trainer_id
,
inner_step
=
trainer
.
_step
,
batch_size
=
64
,
count_by_step
=
count_by_step
)
#
train_data,test_data= data_generater(trainer_id,inner_step=trainer._step,batch_size=64,count_by_step=count_by_step)
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
train_data
,
buf_size
=
500
),
paddle
.
reader
.
shuffle
(
paddle_fl
.
dataset
.
femnist
.
train
(
trainer_id
,
inner_step
=
trainer
.
_step
,
batch_size
=
64
,
count_by_step
=
count_by_step
)
,
buf_size
=
500
),
batch_size
=
64
)
test_reader
=
paddle
.
batch
(
test_data
,
batch_size
=
64
)
paddle_fl
.
dataset
.
femnist
.
test
(
trainer_id
,
inner_step
=
trainer
.
_step
,
batch_size
=
64
,
count_by_step
=
count_by_step
),
batch_size
=
64
)
if
count_by_step
:
for
step_id
,
data
in
enumerate
(
train_reader
()):
acc
=
trainer
.
run
(
feeder
.
feed
(
data
),
fetch
=
[
"accuracy_0.tmp_0"
])
...
...
@@ -116,7 +80,6 @@ while not trainer.stop():
train_test_feed
=
feeder
)
print
(
"Test with epoch %d, accuracy: %s"
%
(
epoch_id
,
acc_val
))
compute_privacy_budget
(
sample_ratio
=
0.001
,
epsilon
=
0.1
,
step
=
step
,
delta
=
0.00001
)
if
trainer_id
==
0
:
save_dir
=
(
output_folder
+
"/epoch_%d"
)
%
epoch_id
trainer
.
save_inference_program
(
output_folder
)
paddle_fl/examples/femnist_demo/run.sh
浏览文件 @
23bf60fc
#killall python
#python fl_master.py
#sleep 2
python
-u
fl_server.py
>
log/server0.log &
python fl_master.py
sleep
2
python
-u
fl_scheduler.py
>
scheduler.log &
sleep
2
...
...
setup.py
浏览文件 @
23bf60fc
...
...
@@ -29,7 +29,7 @@ def python_version():
max_version
,
mid_version
,
min_version
=
python_version
()
REQUIRED_PACKAGES
=
[
'six >= 1.10.0'
,
'protobuf >= 3.1.0'
,
'paddlepaddle >= 1.6'
'six >= 1.10.0'
,
'protobuf >= 3.1.0'
,
'paddlepaddle >= 1.6'
,
]
if
max_version
<
3
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录