diff --git a/python/paddle_fl/mpc/examples/mnist_demo/process_data.py b/python/paddle_fl/mpc/examples/mnist_demo/process_data.py index c85d63a7d0ee8970017796b5df7b0031c2c858dc..dac24d087823bd48d15c670be1f7b18029e4820e 100644 --- a/python/paddle_fl/mpc/examples/mnist_demo/process_data.py +++ b/python/paddle_fl/mpc/examples/mnist_demo/process_data.py @@ -77,10 +77,12 @@ def load_decrypt_data(filepath, shape): p = aby3.reconstruct(np.array(instance)) print(p) -def decrypt_data_to_file(filepath, shape, decrypted_filepath): +def decrypt_data_to_file(filepath, shape, decrypted_file): """ load the encrypted data and reconstruct to a file """ + if os.path.exists(decrypted_file): + os.remove(decrypted_file) part_readers = [] for id in six.moves.range(3): part_readers.append(aby3.load_aby3_shares(filepath, id=id, shape=shape)) @@ -88,6 +90,6 @@ def decrypt_data_to_file(filepath, shape, decrypted_filepath): for instance in aby3_share_reader(): p = aby3.reconstruct(np.array(instance)) - with open(decrypted_filepath, 'a+') as f: + with open(decrypted_file, 'a+') as f: for i in p: f.write(str(i) + '\n') diff --git a/python/paddle_fl/mpc/examples/uci_demo/README.md b/python/paddle_fl/mpc/examples/uci_demo/README.md index 153af59090600b9ff39c263ee7e117344668e667..32ab7ad5c25ad085874b23a6f34adfa47e3b0710 100644 --- a/python/paddle_fl/mpc/examples/uci_demo/README.md +++ b/python/paddle_fl/mpc/examples/uci_demo/README.md @@ -19,6 +19,15 @@ Encrypted data files of feature and label would be generated and saved in `/tmp` #### (2). Launch Demo with A Shell Script +You should set the env params as follow: + +``` +export PYTHON=/yor/python +export PATH_TO_REDIS_BIN=/path/to/redis_bin +export LOCALHOST=/your/localhost +export REDIS_PORT=/your/redis/port +``` + Launch demo with the `run_standalone.sh` script. The concrete command is: ```bash @@ -29,8 +38,25 @@ The loss with cypher text format will be displayed on screen while training. At Besides, predictions would be made in this demo once training is finished. The predictions with cypher text format would also be save in `/tmp` directory. +#### (3). Decrypt Data + Finally, using `load_decrypt_data()` in `process_data.py` script, this demo would decrypt and print the loss and predictions, which can be compared with related results of Paddle plain text model. +For example, users can write the following code into a python script named `decrypt_save.py`, and then run the script with command `python decrypt_save.py decrypt_loss_file decrypt_prediction_file`. The decrypted loss and prediction results would be saved into two files correspondingly. + +```python +import sys + +import process_data + + +decrypt_loss_file=sys.argv[1] +decrypt_prediction_file=sys.argv[2] +BATCH_SIZE=10 +process_data.load_decrypt_data("/tmp/uci_loss", (1, ), decrypt_loss_file) +process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE, ), decrypt_prediction_file) +``` + **Note** that remember to delete the loss and prediction files in `/tmp` directory generated in last running, in case of any influence on the decrypted results of current running. For simplifying users operations, we provide the following commands in `run_standalone.sh`, which can delete the files mentioned above when running this script. ```bash @@ -70,18 +96,6 @@ Each computation party makes the following modifications on `uci_housing_demo.py pfl_mpc.init("aby3", int(role), "localhost", server, int(port)) ``` -* Comment Out Codes for Single Machine Running - - Comment out the following codes which are used when running on single machine. - - ```python - import process_data - print("uci_loss:") - process_data.load_decrypt_data("/tmp/uci_loss", (1,)) - print("prediction:") - process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE,)) - ``` - #### (4). Launch Demo on Each Party **Note** that Redis service is necessary for demo running. Remember to clear the cache of Redis server before launching demo on each computation party, in order to avoid any negative influences caused by the cached records in Redis. The following command can be used for clear Redis, where REDIS_BIN is the executable binary of redis-cli, SERVER and PORT represent the IP and port of Redis server respectively. @@ -106,20 +120,19 @@ Similarly, training loss with cypher text format would be printed on the screen Each computation party sends `uci_loss.part` and `uci_prediction.part` files in `/tmp` directory to the `/tmp` directory of data owner. Data owner decrypts and gets the plain text of loss and predictions with ` load_decrypt_data()` in `process_data.py`. -For example, the following code can be written into a python script to decrypt and print training loss. +For example, the following code can be written into a python script to decrypt and print training loss and predictions. ```python +import sys + import process_data -print("uci_loss:") -process_data.load_decrypt_data("/tmp/uci_loss", (1,)) -``` -And the following code can be written into a python script to decrypt and print predictions. -```python -import process_data -print("prediction:") -process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE,)) +decrypt_loss_file=sys.argv[1] +decrypt_prediction_file=sys.argv[2] +BATCH_SIZE=10 +process_data.load_decrypt_data("/tmp/uci_loss", (1, ), decrypt_loss_file) +process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE, ), decrypt_prediction_file) ``` ### 3. Convergence of paddle_fl.mpc vs paddle diff --git a/python/paddle_fl/mpc/examples/uci_demo/README_CN.md b/python/paddle_fl/mpc/examples/uci_demo/README_CN.md index 66d0163dd6746b0b93571645fedc5fa0cbfd18d0..76ed4fe842243668d1902358291990d60ecd7206 100644 --- a/python/paddle_fl/mpc/examples/uci_demo/README_CN.md +++ b/python/paddle_fl/mpc/examples/uci_demo/README_CN.md @@ -19,7 +19,16 @@ process_data.generate_encrypted_data() #### 2. 使用shell脚本启动demo -使用`run_standalone.sh`脚本,启动并运行demo,命令如下: +运行demo之前,需设置以下环境变量: + +``` +export PYTHON=/yor/python +export PATH_TO_REDIS_BIN=/path/to/redis_bin +export LOCALHOST=/your/localhost +export REDIS_PORT=/your/redis/port +``` + +然后使用`run_standalone.sh`脚本,启动并运行demo,命令如下: ```bash  bash run_standalone.sh uci_housing_demo.py @@ -29,7 +38,23 @@ bash run_standalone.sh uci_housing_demo.py 此外,在完成训练之后,demo会继续进行预测,并将预测密文结果也保存到/tmp目录下的文件中。 +#### 3. 解密数据 + 最后,demo会使用`process_data.py`脚本中的`load_decrypt_data()`,恢复并打印出明文的loss数据和prediction结果,用以和明文Paddle模型结果进行对比。 +例如,将下面的内容写到一个decrypt_save.py脚本中,然后python decrypt_save.py decrypt_loss_file decrypt_prediction_file,将把明文losss数据和预测结果分别保存在文件中。 + +```python +import sys + +import process_data + + +decrypt_loss_file=sys.argv[1] +decrypt_prediction_file=sys.argv[2] +BATCH_SIZE=10 +process_data.load_decrypt_data("/tmp/uci_loss", (1, ), decrypt_loss_file) +process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE, ), decrypt_prediction_file) +``` **注意**:再次启动运行demo之前,请先将上次在`/tmp`保存的loss和prediction文件删除,以免影响本次密文数据的恢复结果。为了简化用户操作,我们在`run_standalone.sh`脚本中加入了如下的内容,可以在执行脚本时删除上次数据。 @@ -72,17 +97,6 @@ fi pfl_mpc.init("aby3", int(role), "localhost", server, int(port)) ``` -* 注释掉单机运行所需代码 - - 将脚本中如下代码注释掉,这部分代码用在单机运行case下。 - - ```python - import process_data - print("uci_loss:") - process_data.load_decrypt_data("/tmp/uci_loss", (1,)) - print("prediction:") - process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE,)) - ``` #### 4. 各计算party启动demo @@ -108,20 +122,19 @@ $PYTHON_EXECUTABLE uci_housing_demo.py $PARTY_ID $SERVER $PORT 各计算party将`/tmp`目录下的`uci_loss.part`和`uci_prediction.part`文件发送到数据方的/tmp目录下。数据方使用process_data.py脚本中的load_decrypt_data()解密恢复出loss数据和prediction数据。 -比如,使用如下内容的python脚本,打印解密的loss数据: +例如,将下面的内容写到一个decrypt_save.py脚本中,然后python decrypt_save.py decrypt_loss_file decrypt_prediction_file,将把明文losss数据和预测结果分别保存在文件中。 ```python +import sys + import process_data -print("uci_loss:") -process_data.load_decrypt_data("/tmp/uci_loss", (1,)) -``` -使用如下内容的python脚本,打印解密的prediction数据: -```python -import process_data -print("prediction:") -process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE,)) +decrypt_loss_file=sys.argv[1] +decrypt_prediction_file=sys.argv[2] +BATCH_SIZE=10 +process_data.load_decrypt_data("/tmp/uci_loss", (1, ), decrypt_loss_file) +process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE, ), decrypt_prediction_file) ``` ### 三. 单机精度测试 diff --git a/python/paddle_fl/mpc/examples/uci_demo/decrypt_save.py b/python/paddle_fl/mpc/examples/uci_demo/decrypt_save.py new file mode 100644 index 0000000000000000000000000000000000000000..af8970e763222e22719a1d903d367ad5698c9bc0 --- /dev/null +++ b/python/paddle_fl/mpc/examples/uci_demo/decrypt_save.py @@ -0,0 +1,28 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Decrypt Prediction Data. +""" +import sys + +import process_data + + +decrypt_loss_file=sys.argv[1] +decrypt_prediction_file=sys.argv[2] +BATCH_SIZE=10 +process_data.load_decrypt_data("/tmp/uci_loss", (1, ), decrypt_loss_file) +print("uci_loss done") +process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE, ), decrypt_prediction_file) +print("prediction done") diff --git a/python/paddle_fl/mpc/examples/uci_demo/prepare.py b/python/paddle_fl/mpc/examples/uci_demo/prepare.py new file mode 100644 index 0000000000000000000000000000000000000000..b9c007416bedb84d50df4e12f196371a73a3c881 --- /dev/null +++ b/python/paddle_fl/mpc/examples/uci_demo/prepare.py @@ -0,0 +1,20 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Prepare data for UCI Housing. +""" +import process_data + + +process_data.generate_encrypted_data() diff --git a/python/paddle_fl/mpc/examples/uci_demo/process_data.py b/python/paddle_fl/mpc/examples/uci_demo/process_data.py index 99d172efc90fe1317f056da0e5340bbd74a4a61b..ee6a23dce20e203dcdec9cb32123ae2fc8baee92 100644 --- a/python/paddle_fl/mpc/examples/uci_demo/process_data.py +++ b/python/paddle_fl/mpc/examples/uci_demo/process_data.py @@ -17,6 +17,7 @@ Process data for UCI Housing. import numpy as np import paddle import six +import os from paddle_fl.mpc.data_utils import aby3 sample_reader = paddle.dataset.uci_housing.train() @@ -45,10 +46,12 @@ def generate_encrypted_data(): aby3.save_aby3_shares(encrypted_housing_labels, "/tmp/house_label") -def load_decrypt_data(filepath, shape): +def load_decrypt_data(filepath, shape, decrypted_file): """ load the encrypted data and reconstruct """ + if os.path.exists(decrypted_file): + os.remove(decrypted_file) part_readers = [] for id in six.moves.range(3): part_readers.append( @@ -59,4 +62,6 @@ def load_decrypt_data(filepath, shape): for instance in aby3_share_reader(): p = aby3.reconstruct(np.array(instance)) - print(p) + with open(decrypted_file, 'a+') as f: + for i in p: + f.write(str(i) + '\n') diff --git a/python/paddle_fl/mpc/examples/uci_demo/run_standalone.sh b/python/paddle_fl/mpc/examples/uci_demo/run_standalone.sh index ce5993e086e028021ada5b5942efde9432b55305..6b258a92629f15df2704051b64e2b4aa303a60e8 100755 --- a/python/paddle_fl/mpc/examples/uci_demo/run_standalone.sh +++ b/python/paddle_fl/mpc/examples/uci_demo/run_standalone.sh @@ -32,12 +32,13 @@ # # modify the following vars according to your environment -PYTHON="python" -REDIS_HOME="path_to_redis_bin" -SERVER="localhost" -PORT=9937 +PYTHON=${PYTHON} +REDIS_HOME=${PATH_TO_REDIS_BIN} +SERVER=${LOCALHOST} +PORT=${REDIS_PORT} +echo "redis home in ${REDIS_HOME}, server is ${SERVER}, port is ${PORT}" -function usage() { +function usage(){ echo 'run_standalone.sh SCRIPT_NAME [ARG...]' exit 0 } @@ -64,14 +65,25 @@ $REDIS_BIN -h $SERVER -p $PORT flushall # remove temp data generated in last time LOSS_FILE="/tmp/uci_loss.*" PRED_FILE="/tmp/uci_prediction.*" -if [ "$LOSS_FILE" ]; then +ls ${LOSS_FILE} +if [ $? -eq 0 ]; then rm -rf $LOSS_FILE fi -if [ "$PRED_FILE" ]; then +ls ${PRED_FILE} +if [ $? -eq 0 ]; then rm -rf $PRED_FILE fi +TRAINING_FILE="/tmp/house_feature.part*" +ls ${TRAINING_FILE} +if [ $? -ne 0 ]; then + echo "There is no data in /tmp, please prepare data with "python prepare.py" firstly" + exit 1 +else + echo "There are data for uci:" + echo "`ls ${TRAINING_FILE}`" +fi # kick off script with roles of 1 and 2, and redirect output to /dev/null for role in {1..2}; do diff --git a/python/paddle_fl/mpc/examples/uci_demo/uci_housing_demo.py b/python/paddle_fl/mpc/examples/uci_demo/uci_housing_demo.py index 43913d8e07788d64f970b4cf4b2ee70b58f2819e..1f3e4a1b959d04ec695bbf50e7942bb9ae1e1783 100644 --- a/python/paddle_fl/mpc/examples/uci_demo/uci_housing_demo.py +++ b/python/paddle_fl/mpc/examples/uci_demo/uci_housing_demo.py @@ -61,8 +61,8 @@ exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) epoch_num = 20 -start_time = time.time() for epoch_id in range(epoch_num): + start_time = time.time() step = 0 # Method 1: feed data directly @@ -71,17 +71,18 @@ for epoch_id in range(epoch_num): # Method 2: feed data via loader for sample in loader(): + step_start = time.time() mpc_loss = exe.run(feed=sample, fetch_list=[avg_loss]) + step_end = time.time() if step % 50 == 0: - print('Epoch={}, Step={}, Loss={}'.format(epoch_id, step, - mpc_loss)) + print('Epoch={}, Step={}, batch_cost={:.4f} s, Loss={},'.format(epoch_id, step, + (step_end - step_start), mpc_loss)) with open(loss_file, 'ab') as f: f.write(np.array(mpc_loss).tostring()) - step += 1 - -end_time = time.time() -print('Mpc Training of Epoch={} Batch_size={}, cost time in seconds:{}' + step += 1 + end_time = time.time() + print('Mpc Training of Epoch={} Batch_size={}, epoch_cost={:.4f} s' .format(epoch_num, BATCH_SIZE, (end_time - start_time))) prediction_file = "/tmp/uci_prediction.part{}".format(role) @@ -92,9 +93,3 @@ for sample in loader(): with open(prediction_file, 'ab') as f: f.write(np.array(prediction).tostring()) break - -import process_data -print("uci_loss:") -process_data.load_decrypt_data("/tmp/uci_loss", (1, )) -print("prediction:") -process_data.load_decrypt_data("/tmp/uci_prediction", (BATCH_SIZE, ))