提交 23bf60fc 编写于 作者: Q qjing666

update femnist dataset

上级 cd696e5d
import requests
import os
import json
import tarfile
import random
url = "https://paddlefl.bj.bcebos.com/leaf/"
target_path = "femnist_data"
tar_path = target_path+".tar.gz"
print(tar_path)
def download(url):
r = requests.get(url)
with open(tar_path,'wb') as f:
f.write(r.content)
def extract(tar_path):
tar = tarfile.open(tar_path, "r:gz")
file_names = tar.getnames()
for file_name in file_names:
tar.extract(file_name)
tar.close()
def train(trainer_id,inner_step,batch_size,count_by_step):
if not os.path.exists(target_path):
print("Preparing data...")
if not os.path.exists(tar_path):
download(url+tar_path)
extract(tar_path)
def train_data():
train_file = open("./femnist_data/train/all_data_%d_niid_0_keep_0_train_9.json" % trainer_id,'r')
json_train = json.load(train_file)
users = json_train["users"]
rand = random.randrange(0,len(users)) # random choose a user from each trainer
cur_user = users[rand]
print('training using '+cur_user)
train_images = json_train["user_data"][cur_user]['x']
train_labels = json_train["user_data"][cur_user]['y']
if count_by_step:
for i in xrange(inner_step*batch_size):
yield train_images[i%(len(train_images))], train_labels[i%(len(train_images))]
else:
for i in xrange(len(train_images)):
yield train_images[i], train_labels[i]
train_file.close()
return train_data
def test(trainer_id,inner_step,batch_size,count_by_step):
if not os.path.exists(target_path):
print("Preparing data...")
if not os.path.exists(tar_path):
download(url+tar_path)
extract(tar_path)
def test_data():
test_file = open("./femnist_data/test/all_data_%d_niid_0_keep_0_test_9.json" % trainer_id, 'r')
json_test = json.load(test_file)
users = json_test["users"]
for user in users:
test_images = json_test['user_data'][user]['x']
test_labels = json_test['user_data'][user]['y']
for i in xrange(len(test_images)):
yield test_images[i], test_labels[i]
test_file.close()
return test_data
from paddle_fl.core.trainer.fl_trainer import FLTrainerFactory from paddle_fl.core.trainer.fl_trainer import FLTrainerFactory
from paddle_fl.core.master.fl_job import FLRunTimeJob from paddle_fl.core.master.fl_job import FLRunTimeJob
import paddle_fl.dataset.femnist
import numpy import numpy
import sys import sys
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import logging import logging
import math import math
import random
import json
logging.basicConfig(filename="test.log", filemode="w", format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%d-%M-%Y %H:%M:%S", level=logging.DEBUG) logging.basicConfig(filename="test.log", filemode="w", format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%d-%M-%Y %H:%M:%S", level=logging.DEBUG)
...@@ -22,36 +21,6 @@ trainer.start() ...@@ -22,36 +21,6 @@ trainer.start()
print(trainer._step) print(trainer._step)
test_program = trainer._main_program.clone(for_test=True) test_program = trainer._main_program.clone(for_test=True)
def data_generater(trainer_id,inner_step,batch_size,count_by_step):
train_file = open("./femnist_data/train/all_data_%d_niid_0_keep_0_train_9.json" % trainer_id,'r')
test_file = open("./femnist_data/test/all_data_%d_niid_0_keep_0_test_9.json" % trainer_id, 'r')
json_train = json.load(train_file)
json_test = json.load(test_file)
users = json_train["users"]
rand = random.randrange(0,len(users)) # random choose a user from each trainer
cur_user = users[rand]
print('training using '+cur_user)
def train_data():
train_images = json_train["user_data"][cur_user]['x']
train_labels = json_train["user_data"][cur_user]['y']
if count_by_step:
for i in xrange(inner_step*batch_size):
yield train_images[i%(len(train_images))], train_labels[i%(len(train_images))]
else:
for i in xrange(len(train_images)):
yield train_images[i], train_labels[i]
def test_data():
for user in users:
test_images = json_test['user_data'][user]['x']
test_labels = json_test['user_data'][user]['y']
for i in xrange(len(test_images)):
yield test_images[i], test_labels[i]
train_file.close()
test_file.close()
return train_data, test_data
img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32') img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64') label = fluid.layers.data(name='label', shape=[1], dtype='int64')
feeder = fluid.DataFeeder(feed_list=[img, label], place=fluid.CPUPlace()) feeder = fluid.DataFeeder(feed_list=[img, label], place=fluid.CPUPlace())
...@@ -67,13 +36,6 @@ def train_test(train_test_program, train_test_feed, train_test_reader): ...@@ -67,13 +36,6 @@ def train_test(train_test_program, train_test_feed, train_test_reader):
acc_val_mean = numpy.array(acc_set).mean() acc_val_mean = numpy.array(acc_set).mean()
return acc_val_mean return acc_val_mean
def compute_privacy_budget(sample_ratio, epsilon, step, delta):
E = 2 * epsilon * math.sqrt(step * sample_ratio)
print("({0}, {1})-DP".format(E, delta))
epoch_id = 0 epoch_id = 0
step = 0 step = 0
epoch = 3000 epoch = 3000
...@@ -90,13 +52,15 @@ while not trainer.stop(): ...@@ -90,13 +52,15 @@ while not trainer.stop():
if epoch_id > epoch: if epoch_id > epoch:
break break
print("epoch %d start train" % (epoch_id)) print("epoch %d start train" % (epoch_id))
train_data,test_data= data_generater(trainer_id,inner_step=trainer._step,batch_size=64,count_by_step=count_by_step) #train_data,test_data= data_generater(trainer_id,inner_step=trainer._step,batch_size=64,count_by_step=count_by_step)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle(train_data, buf_size=500), paddle.reader.shuffle(paddle_fl.dataset.femnist.train(trainer_id,inner_step=trainer._step,batch_size=64,count_by_step=count_by_step), buf_size=500),
batch_size=64) batch_size=64)
test_reader = paddle.batch( test_reader = paddle.batch(
test_data, batch_size=64) paddle_fl.dataset.femnist.test(trainer_id,inner_step=trainer._step,batch_size=64,count_by_step=count_by_step), batch_size=64)
if count_by_step: if count_by_step:
for step_id, data in enumerate(train_reader()): for step_id, data in enumerate(train_reader()):
acc = trainer.run(feeder.feed(data), fetch=["accuracy_0.tmp_0"]) acc = trainer.run(feeder.feed(data), fetch=["accuracy_0.tmp_0"])
...@@ -116,7 +80,6 @@ while not trainer.stop(): ...@@ -116,7 +80,6 @@ while not trainer.stop():
train_test_feed=feeder) train_test_feed=feeder)
print("Test with epoch %d, accuracy: %s" % (epoch_id, acc_val)) print("Test with epoch %d, accuracy: %s" % (epoch_id, acc_val))
compute_privacy_budget(sample_ratio=0.001, epsilon=0.1, step=step, delta=0.00001)
if trainer_id == 0: if trainer_id == 0:
save_dir = (output_folder + "/epoch_%d") % epoch_id save_dir = (output_folder + "/epoch_%d") % epoch_id
trainer.save_inference_program(output_folder) trainer.save_inference_program(output_folder)
#killall python #killall python
#python fl_master.py python fl_master.py
#sleep 2
python -u fl_server.py >log/server0.log &
sleep 2 sleep 2
python -u fl_scheduler.py >scheduler.log & python -u fl_scheduler.py >scheduler.log &
sleep 2 sleep 2
......
...@@ -29,7 +29,7 @@ def python_version(): ...@@ -29,7 +29,7 @@ def python_version():
max_version, mid_version, min_version = python_version() max_version, mid_version, min_version = python_version()
REQUIRED_PACKAGES = [ REQUIRED_PACKAGES = [
'six >= 1.10.0', 'protobuf >= 3.1.0','paddlepaddle >= 1.6' 'six >= 1.10.0', 'protobuf >= 3.1.0','paddlepaddle >= 1.6',
] ]
if max_version < 3: if max_version < 3:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册