From 30357a8d427a2be1a6c4fdb840d7a870a43da44a Mon Sep 17 00:00:00 2001 From: jhjiangcs Date: Thu, 27 Aug 2020 12:13:01 +0000 Subject: [PATCH] add lenet network demo, improve mnist demo. --- .../examples/lenet_with_mnist/train_lenet.py | 48 ++++-- .../examples/logistic_with_mnist/README.md | 2 +- .../examples/logistic_with_mnist/README_CN.md | 2 +- .../logistic_with_mnist/train_fc_softmax.py | 149 ++++++++++++++++++ 4 files changed, 182 insertions(+), 19 deletions(-) create mode 100644 python/paddle_fl/mpc/examples/logistic_with_mnist/train_fc_softmax.py diff --git a/python/paddle_fl/mpc/examples/lenet_with_mnist/train_lenet.py b/python/paddle_fl/mpc/examples/lenet_with_mnist/train_lenet.py index 27fba5b..70f240e 100644 --- a/python/paddle_fl/mpc/examples/lenet_with_mnist/train_lenet.py +++ b/python/paddle_fl/mpc/examples/lenet_with_mnist/train_lenet.py @@ -44,23 +44,37 @@ epoch_num = 5 x = pfl_mpc.data(name='x', shape=[BATCH_SIZE, 1, 28, 28], dtype='int64') y = pfl_mpc.data(name='y', shape=[BATCH_SIZE, 10], dtype='int64') -# lenet-3 network -#conv = pfl_mpc.layers.conv2d(input=x, num_filters=16, filter_size=5, act='relu') -#pool = pfl_mpc.layers.pool2d(input=conv, pool_size=2, pool_stride=2) -#fc_1 = pfl_mpc.layers.fc(input=pool, size=100, act='relu') - -# lenet-5 network -conv_1 = pfl_mpc.layers.conv2d(input=x, num_filters=16, filter_size=5, act='relu') -pool_1 = pfl_mpc.layers.pool2d(input=conv_1, pool_size=2, pool_stride=2) -conv_2 = pfl_mpc.layers.conv2d(input=pool_1, num_filters=16, filter_size=5, act='relu') -pool_2 = pfl_mpc.layers.pool2d(input=conv_2, pool_size=2, pool_stride=2) -fc_1 = pfl_mpc.layers.fc(input=pool_2, size=100, act='relu') - -fc_out = pfl_mpc.layers.fc(input=fc_1, size=10) -cost, softmax = pfl_mpc.layers.softmax_with_cross_entropy(logits=fc_out, - label=y, - soft_label=True, - return_softmax=True) + +class Model(object): + def __int__(self): + pass + + def lenet3(self): + conv = pfl_mpc.layers.conv2d(input=x, num_filters=16, filter_size=5, act='relu') + pool = pfl_mpc.layers.pool2d(input=conv, pool_size=2, pool_stride=2) + fc_1 = pfl_mpc.layers.fc(input=pool, size=100, act='relu') + fc_out = pfl_mpc.layers.fc(input=fc_1, size=10) + cost, softmax = pfl_mpc.layers.softmax_with_cross_entropy(logits=fc_out, + label=y, + soft_label=True, + return_softmax=True) + return cost, softmax + + def lenet5(self): + conv_1 = pfl_mpc.layers.conv2d(input=x, num_filters=16, filter_size=5, act='relu') + pool_1 = pfl_mpc.layers.pool2d(input=conv_1, pool_size=2, pool_stride=2) + conv_2 = pfl_mpc.layers.conv2d(input=pool_1, num_filters=16, filter_size=5, act='relu') + pool_2 = pfl_mpc.layers.pool2d(input=conv_2, pool_size=2, pool_stride=2) + fc_1 = pfl_mpc.layers.fc(input=pool_2, size=100, act='relu') + fc_out = pfl_mpc.layers.fc(input=fc_1, size=10) + cost, softmax = pfl_mpc.layers.softmax_with_cross_entropy(logits=fc_out, + label=y, + soft_label=True, + return_softmax=True) + return cost, softmax + +model = Model() +cost, softmax = model.lenet5() infer_program = fluid.default_main_program().clone(for_test=False) diff --git a/python/paddle_fl/mpc/examples/logistic_with_mnist/README.md b/python/paddle_fl/mpc/examples/logistic_with_mnist/README.md index c28c86f..dae0d4e 100644 --- a/python/paddle_fl/mpc/examples/logistic_with_mnist/README.md +++ b/python/paddle_fl/mpc/examples/logistic_with_mnist/README.md @@ -8,7 +8,7 @@ This document introduces how to run MNIST demo based on Paddle-MPC, which has tw #### (1). Prepare Data -Generate encrypted training and testing data utilizing `generate_encrypted_data()` and `generate_encrypted_test_data()` in `process_data.py` script. Users can run the script with command `python process_data.py` to generate encrypted feature and label in given directory, e.g., `./mpc_data/`. Users can specify `class_num` (2 or 10) to determine the encrypted data is for `fc_sigmoid`(two classes) or `lenet`(10 classes) network. Different suffix names are used for these files to indicate the ownership of different computation parties. For instance, a file named `mnist2_feature.part0` means it is a feature file of party 0. +Generate encrypted training and testing data utilizing `generate_encrypted_data()` and `generate_encrypted_test_data()` in `process_data.py` script. Users can run the script with command `python process_data.py` to generate encrypted feature and label in given directory, e.g., `./mpc_data/`. Users can specify `class_num` (2 or 10) to determine the encrypted data is for `logisticfc_sigmoid`(two classes) or `lenet` and `logistic_fc_softmax`(10 classes) network. Different suffix names are used for these files to indicate the ownership of different computation parties. For instance, a file named `mnist2_feature.part0` means it is a feature file of party 0. #### (2). Launch Demo with A Shell Script diff --git a/python/paddle_fl/mpc/examples/logistic_with_mnist/README_CN.md b/python/paddle_fl/mpc/examples/logistic_with_mnist/README_CN.md index 38330b4..33ebcfc 100644 --- a/python/paddle_fl/mpc/examples/logistic_with_mnist/README_CN.md +++ b/python/paddle_fl/mpc/examples/logistic_with_mnist/README_CN.md @@ -8,7 +8,7 @@ #### 1. 准备数据 -使用`process_data.py`脚本中的`generate_encrypted_data()`和`generate_encrypted_test_data()`产生加密训练数据和测试数据,用户可以直接运行脚本`python process_data.py`在指定的目录下(比如`./mpc_data/`)产生加密特征和标签。用户可以通过参数`class_num`指定label的类别数目,从而产生适用于`fc_sigmoid`(二分类)和`lenet`(十分类)网络的加密数据。在指定目录下生成对应于3个计算party的feature和label的加密数据文件,以后缀名区分属于不同party的数据。比如,`mnist2_feature.part0`表示属于party0的feature数据。 +使用`process_data.py`脚本中的`generate_encrypted_data()`和`generate_encrypted_test_data()`产生加密训练数据和测试数据,用户可以直接运行脚本`python process_data.py`在指定的目录下(比如`./mpc_data/`)产生加密特征和标签。用户可以通过参数`class_num`指定label的类别数目,从而产生适用于`logistic_fc_sigmoid`(二分类)或`lenet``logistic_fc_softmax`(十分类)网络的加密数据。在指定目录下生成对应于3个计算party的feature和label的加密数据文件,以后缀名区分属于不同party的数据。比如,`mnist2_feature.part0`表示属于party0的feature数据。 #### 2. 使用shell脚本启动demo diff --git a/python/paddle_fl/mpc/examples/logistic_with_mnist/train_fc_softmax.py b/python/paddle_fl/mpc/examples/logistic_with_mnist/train_fc_softmax.py new file mode 100644 index 0000000..457ce96 --- /dev/null +++ b/python/paddle_fl/mpc/examples/logistic_with_mnist/train_fc_softmax.py @@ -0,0 +1,149 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +MNIST CNN Demo (LeNet5) +""" + +import sys +import os +import numpy as np +import time +import logging +import math + +import paddle +import paddle.fluid as fluid +import paddle.fluid.profiler as profiler +import paddle_fl.mpc as pfl_mpc +import paddle_fl.mpc.data_utils.aby3 as aby3 + +logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger("fluid") +logger.setLevel(logging.INFO) + +role, server, port = sys.argv[1], sys.argv[2], sys.argv[3] +# modify host(localhost). +pfl_mpc.init("aby3", int(role), "localhost", server, int(port)) +role = int(role) + +# data preprocessing +BATCH_SIZE = 128 +epoch_num = 5 + +x = pfl_mpc.data(name='x', shape=[BATCH_SIZE, 1, 28, 28], dtype='int64') +y = pfl_mpc.data(name='y', shape=[BATCH_SIZE, 10], dtype='int64') + +fc_out = pfl_mpc.layers.fc(input=x, size=10) +cost, softmax = pfl_mpc.layers.softmax_with_cross_entropy(logits=fc_out, + label=y, + soft_label=True, + return_softmax=True) + +infer_program = fluid.default_main_program().clone(for_test=False) + +avg_loss = pfl_mpc.layers.mean(cost) +optimizer = pfl_mpc.optimizer.SGD(learning_rate=0.1) +optimizer.minimize(avg_loss) + +# prepare train and test reader +mpc_data_dir = "./mpc_data/" +if not os.path.exists(mpc_data_dir): + raise ValueError("mpc_data_dir is not found. Please prepare encrypted data.") + +# train_reader +feature_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist10_feature", id=role, shape=(1, 28, 28)) +label_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist10_label", id=role, shape=(10,)) +batch_feature = aby3.batch(feature_reader, BATCH_SIZE, drop_last=True) +batch_label = aby3.batch(label_reader, BATCH_SIZE, drop_last=True) + +# test_reader +test_feature_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist10_test_feature", id=role, shape=(1, 28, 28)) +test_label_reader = aby3.load_aby3_shares(mpc_data_dir + "mnist10_test_label", id=role, shape=(10,)) +test_batch_feature = aby3.batch(test_feature_reader, BATCH_SIZE, drop_last=True) +test_batch_label = aby3.batch(test_label_reader, BATCH_SIZE, drop_last=True) + +place = fluid.CPUPlace() + +# async data loader +loader = fluid.io.DataLoader.from_generator(feed_list=[x, y], capacity=BATCH_SIZE) +batch_sample = paddle.reader.compose(batch_feature, batch_label) +loader.set_batch_generator(batch_sample, places=place) + +test_loader = fluid.io.DataLoader.from_generator(feed_list=[x, y], capacity=BATCH_SIZE) +test_batch_sample = paddle.reader.compose(test_batch_feature, test_batch_label) +test_loader.set_batch_generator(test_batch_sample, places=place) + +# infer +def infer(): + """ + MPC infer + """ + mpc_infer_data_dir = "./mpc_infer_data/" + if not os.path.exists(mpc_infer_data_dir): + os.mkdir(mpc_infer_data_dir) + prediction_file = mpc_infer_data_dir + "mnist_debug_prediction" + prediction_file_part = prediction_file + ".part{}".format(role) + + if os.path.exists(prediction_file_part): + os.remove(prediction_file_part) + + step = 0 + start_time = time.time() + for sample in test_loader(): + step += 1 + prediction = exe.run(program=infer_program, feed=sample, fetch_list=[softmax]) + with open(prediction_file_part, 'ab') as f: + f.write(np.array(prediction).tostring()) + if step % 10 == 0: + end_time = time.time() + logger.info('MPC infer of step={}, cost time in seconds:{}'.format(step, (end_time - start_time))) + + end_time = time.time() + logger.info('MPC infer time in seconds:{}'.format((end_time - start_time))) + +# train +exe = fluid.Executor(place) +exe.run(fluid.default_startup_program()) + +mpc_model_basedir = "./mpc_model/" + +step = 0 +start_time = time.time() +logger.info('MPC training start...') +for epoch_id in range(epoch_num): + for sample in loader(): + step += 1 + results = exe.run(feed=sample, fetch_list=[softmax]) + if step % 100 == 0: + end_time = time.time() + logger.info('MPC training of epoch_id={} step={}, cost time in seconds:{}' + .format(epoch_id, step, (end_time - start_time))) + + # For each epoch: infer or save infer program + #infer() + mpc_model_dir = mpc_model_basedir + "epoch{}/party{}".format(epoch_id, role) + fluid.io.save_inference_model(dirname=mpc_model_dir, + feeded_var_names=["x", "y"], + target_vars=[softmax], + executor=exe, + main_program=infer_program, + model_filename="__model__") + +end_time = time.time() +logger.info('MPC training of epoch_num={} batch_size={}, cost time in seconds:{}' + .format(epoch_num, BATCH_SIZE, (end_time - start_time))) + +# infer +infer() + -- GitLab