提交 23bf60fc 编写于 作者: Q qjing666

update femnist dataset

上级 cd696e5d
import requests
import os
import json
import tarfile
import random
url = "https://paddlefl.bj.bcebos.com/leaf/"
target_path = "femnist_data"
tar_path = target_path+".tar.gz"
print(tar_path)
def download(url):
r = requests.get(url)
with open(tar_path,'wb') as f:
f.write(r.content)
def extract(tar_path):
tar = tarfile.open(tar_path, "r:gz")
file_names = tar.getnames()
for file_name in file_names:
tar.extract(file_name)
tar.close()
def train(trainer_id,inner_step,batch_size,count_by_step):
if not os.path.exists(target_path):
print("Preparing data...")
if not os.path.exists(tar_path):
download(url+tar_path)
extract(tar_path)
def train_data():
train_file = open("./femnist_data/train/all_data_%d_niid_0_keep_0_train_9.json" % trainer_id,'r')
json_train = json.load(train_file)
users = json_train["users"]
rand = random.randrange(0,len(users)) # random choose a user from each trainer
cur_user = users[rand]
print('training using '+cur_user)
train_images = json_train["user_data"][cur_user]['x']
train_labels = json_train["user_data"][cur_user]['y']
if count_by_step:
for i in xrange(inner_step*batch_size):
yield train_images[i%(len(train_images))], train_labels[i%(len(train_images))]
else:
for i in xrange(len(train_images)):
yield train_images[i], train_labels[i]
train_file.close()
return train_data
def test(trainer_id,inner_step,batch_size,count_by_step):
if not os.path.exists(target_path):
print("Preparing data...")
if not os.path.exists(tar_path):
download(url+tar_path)
extract(tar_path)
def test_data():
test_file = open("./femnist_data/test/all_data_%d_niid_0_keep_0_test_9.json" % trainer_id, 'r')
json_test = json.load(test_file)
users = json_test["users"]
for user in users:
test_images = json_test['user_data'][user]['x']
test_labels = json_test['user_data'][user]['y']
for i in xrange(len(test_images)):
yield test_images[i], test_labels[i]
test_file.close()
return test_data
from paddle_fl.core.trainer.fl_trainer import FLTrainerFactory
from paddle_fl.core.master.fl_job import FLRunTimeJob
import paddle_fl.dataset.femnist
import numpy
import sys
import paddle
import paddle.fluid as fluid
import logging
import math
import random
import json
logging.basicConfig(filename="test.log", filemode="w", format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%d-%M-%Y %H:%M:%S", level=logging.DEBUG)
......@@ -22,36 +21,6 @@ trainer.start()
print(trainer._step)
test_program = trainer._main_program.clone(for_test=True)
def data_generater(trainer_id,inner_step,batch_size,count_by_step):
train_file = open("./femnist_data/train/all_data_%d_niid_0_keep_0_train_9.json" % trainer_id,'r')
test_file = open("./femnist_data/test/all_data_%d_niid_0_keep_0_test_9.json" % trainer_id, 'r')
json_train = json.load(train_file)
json_test = json.load(test_file)
users = json_train["users"]
rand = random.randrange(0,len(users)) # random choose a user from each trainer
cur_user = users[rand]
print('training using '+cur_user)
def train_data():
train_images = json_train["user_data"][cur_user]['x']
train_labels = json_train["user_data"][cur_user]['y']
if count_by_step:
for i in xrange(inner_step*batch_size):
yield train_images[i%(len(train_images))], train_labels[i%(len(train_images))]
else:
for i in xrange(len(train_images)):
yield train_images[i], train_labels[i]
def test_data():
for user in users:
test_images = json_test['user_data'][user]['x']
test_labels = json_test['user_data'][user]['y']
for i in xrange(len(test_images)):
yield test_images[i], test_labels[i]
train_file.close()
test_file.close()
return train_data, test_data
img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
feeder = fluid.DataFeeder(feed_list=[img, label], place=fluid.CPUPlace())
......@@ -67,13 +36,6 @@ def train_test(train_test_program, train_test_feed, train_test_reader):
acc_val_mean = numpy.array(acc_set).mean()
return acc_val_mean
def compute_privacy_budget(sample_ratio, epsilon, step, delta):
E = 2 * epsilon * math.sqrt(step * sample_ratio)
print("({0}, {1})-DP".format(E, delta))
epoch_id = 0
step = 0
epoch = 3000
......@@ -90,13 +52,15 @@ while not trainer.stop():
if epoch_id > epoch:
break
print("epoch %d start train" % (epoch_id))
train_data,test_data= data_generater(trainer_id,inner_step=trainer._step,batch_size=64,count_by_step=count_by_step)
#train_data,test_data= data_generater(trainer_id,inner_step=trainer._step,batch_size=64,count_by_step=count_by_step)
train_reader = paddle.batch(
paddle.reader.shuffle(train_data, buf_size=500),
paddle.reader.shuffle(paddle_fl.dataset.femnist.train(trainer_id,inner_step=trainer._step,batch_size=64,count_by_step=count_by_step), buf_size=500),
batch_size=64)
test_reader = paddle.batch(
test_data, batch_size=64)
paddle_fl.dataset.femnist.test(trainer_id,inner_step=trainer._step,batch_size=64,count_by_step=count_by_step), batch_size=64)
if count_by_step:
for step_id, data in enumerate(train_reader()):
acc = trainer.run(feeder.feed(data), fetch=["accuracy_0.tmp_0"])
......@@ -116,7 +80,6 @@ while not trainer.stop():
train_test_feed=feeder)
print("Test with epoch %d, accuracy: %s" % (epoch_id, acc_val))
compute_privacy_budget(sample_ratio=0.001, epsilon=0.1, step=step, delta=0.00001)
if trainer_id == 0:
save_dir = (output_folder + "/epoch_%d") % epoch_id
trainer.save_inference_program(output_folder)
#killall python
#python fl_master.py
#sleep 2
python -u fl_server.py >log/server0.log &
python fl_master.py
sleep 2
python -u fl_scheduler.py >scheduler.log &
sleep 2
......
......@@ -29,7 +29,7 @@ def python_version():
max_version, mid_version, min_version = python_version()
REQUIRED_PACKAGES = [
'six >= 1.10.0', 'protobuf >= 3.1.0','paddlepaddle >= 1.6'
'six >= 1.10.0', 'protobuf >= 3.1.0','paddlepaddle >= 1.6',
]
if max_version < 3:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册