diff --git a/python/paddle_fl/mpc/examples/mean_normalize_demo/README.md b/python/paddle_fl/mpc/examples/mean_normalize_demo/README.md index 6200d070cf920e91f41de76ab5f2c8600dbe69d6..5d590c6e2e74585201e773f9ff32d6a0c5e866be 100644 --- a/python/paddle_fl/mpc/examples/mean_normalize_demo/README.md +++ b/python/paddle_fl/mpc/examples/mean_normalize_demo/README.md @@ -11,8 +11,9 @@ Create a empty dir for data, and modify `data_path` in `process_data.py`, default dir path is `./data`. Then run the script with command `python prepare.py` to generate random data -for demo. Otherwise generate your own data, move them to `data_path` and modify -corresponding meta info in `prepare.py`. +for demo, which is dumped by numpy and named `feature_data.{i}.npy` located +in `data_path`. Otherwise generate your own data, move them to `data_path`, +name as the same way, and modify corresponding meta info in `prepare.py`. Encrypted data files of feature statstics would be generated and saved in `data_path` directory. Different suffix names are used for these files to @@ -55,5 +56,9 @@ import process_data res = process_data.decrypt_data(prepare.data_path + 'result', (2, prepare.feat_width, )) ``` -Also, `verify.py` could be used to calculate error between direct plaintext -numpy calculation and mpc mean normalize. +Or use `decrypt_and_rescale.py` to decrypt, rescale the feature data which has +been saved in `feature_data.{i}.npy`, and dump the normalized data to +`normalized_data.{i}.npy` which is located in `data_path`. + +Also, `verify.py` could be used to calculate error of `f_range` and `f_mean` +between direct plaintext numpy calculation and mpc mean normalize. diff --git a/python/paddle_fl/mpc/examples/mean_normalize_demo/decrypt_and_rescale.py b/python/paddle_fl/mpc/examples/mean_normalize_demo/decrypt_and_rescale.py new file mode 100644 index 0000000000000000000000000000000000000000..e257a0d7c7c41bb1e611ab3bb5e5a73ae00ebbc5 --- /dev/null +++ b/python/paddle_fl/mpc/examples/mean_normalize_demo/decrypt_and_rescale.py @@ -0,0 +1,35 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Decrypt and rescale for mean normalize demo. +""" +import sys +import numpy as np +import process_data +import prepare + +data_path = prepare.data_path +# 0 for f_range, 1 for f_mean +# use decrypted global f_range and f_mean to rescaling local feature data +res = process_data.decrypt_data(data_path + 'result', (2, prepare.feat_width, )) + +party = sys.argv[1] + +input = np.load(data_path + 'feature_data.' + party + '.npy') + +output = (input - res[1]) / res[0] + +np.save(data_path + 'normalized_data.' + party, output) + +