提交 63ae7e62 编写于 作者: H He, Kai

add mpc operator add, move mean_normalize to ml.py

上级 57c82ab5
...@@ -319,17 +319,24 @@ public: ...@@ -319,17 +319,24 @@ public:
auto a_tuple = from_tensor(in); auto a_tuple = from_tensor(in);
auto a_ = std::get<0>(a_tuple).get(); auto a_ = std::get<0>(a_tuple).get();
auto b_tuple = from_tensor<BoolTensor>(pos_info);
auto b_ = std::get<0>(b_tuple).get();
auto out_tuple = from_tensor(out); auto out_tuple = from_tensor(out);
auto out_ = std::get<0>(out_tuple).get(); auto out_ = std::get<0>(out_tuple).get();
if (pos_info) { a_->max_pooling(out_, b_);
auto b_tuple = from_tensor<BoolTensor>(pos_info); }
auto b_ = std::get<0>(b_tuple).get();
void max(const Tensor* in, Tensor* out) override {
auto a_tuple = from_tensor(in);
auto a_ = std::get<0>(a_tuple).get();
auto out_tuple = from_tensor(out);
auto out_ = std::get<0>(out_tuple).get();
a_->max_pooling(out_, b_); a_->max_pooling(out_, nullptr);
} else {
a_->max_pooling(out_, nullptr);
}
} }
void inverse_square_root(const Tensor* in, Tensor* out) override { void inverse_square_root(const Tensor* in, Tensor* out) override {
......
...@@ -82,6 +82,10 @@ public: ...@@ -82,6 +82,10 @@ public:
// for filter in other shape, reshape input first // for filter in other shape, reshape input first
virtual void max_pooling(const Tensor* in, Tensor* out, Tensor* pos_info) {} virtual void max_pooling(const Tensor* in, Tensor* out, Tensor* pos_info) {}
// column wise max
// in shape [n, ...], out shape [1, ...]
virtual void max(const Tensor* in, Tensor* out) {}
virtual void inverse_square_root(const Tensor* in, Tensor* out) = 0; virtual void inverse_square_root(const Tensor* in, Tensor* out) = 0;
virtual void predicts_to_indices(const Tensor* in, virtual void predicts_to_indices(const Tensor* in,
......
...@@ -54,10 +54,10 @@ class MpcMeanNormalizationKernel : public MpcOpKernel<T> { ...@@ -54,10 +54,10 @@ class MpcMeanNormalizationKernel : public MpcOpKernel<T> {
->mpc_operators()->neg(min, &neg_min); ->mpc_operators()->neg(min, &neg_min);
mpc::MpcInstance::mpc_instance()->mpc_protocol() mpc::MpcInstance::mpc_instance()->mpc_protocol()
->mpc_operators()->max_pooling(&neg_min, &neg_min_global, nullptr); ->mpc_operators()->max(&neg_min, &neg_min_global);
mpc::MpcInstance::mpc_instance()->mpc_protocol() mpc::MpcInstance::mpc_instance()->mpc_protocol()
->mpc_operators()->max_pooling(max, &max_global, nullptr); ->mpc_operators()->max(max, &max_global);
range->mutable_data<T>( range->mutable_data<T>(
framework::make_ddim({share_num, 1, feat_num}), context.GetPlace(), 0); framework::make_ddim({share_num, 1, feat_num}), context.GetPlace(), 0);
......
...@@ -37,8 +37,6 @@ from . import rnn ...@@ -37,8 +37,6 @@ from . import rnn
from .rnn import * from .rnn import *
from . import metric_op from . import metric_op
from .metric_op import * from .metric_op import *
from . import data_preprocessing
from .data_preprocessing import *
__all__ = [] __all__ = []
__all__ += basic.__all__ __all__ += basic.__all__
...@@ -48,4 +46,3 @@ __all__ += ml.__all__ ...@@ -48,4 +46,3 @@ __all__ += ml.__all__
__all__ += compare.__all__ __all__ += compare.__all__
__all__ += conv.__all__ __all__ += conv.__all__
__all__ += metric_op.__all__ __all__ += metric_op.__all__
__all__ += data_preprocessing.__all__
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
mpc data preprocessing op layers.
"""
from paddle.fluid.data_feeder import check_type, check_dtype
from ..framework import check_mpc_variable_and_dtype
from ..mpc_layer_helper import MpcLayerHelper
from .math import reduce_sum
__all__ = ['mean_normalize']
def mean_normalize(f_min, f_max, f_mean, sample_num):
'''
Mean normalization is a method used to normalize the range of independent
variables or features of data.
Refer to:
https://en.wikipedia.org/wiki/Feature_scaling#Mean_normalization
Args:
f_min (Variable): A 2-D tensor with shape [P, N], where P is the party
num and N is the feature num. Each row contains the
local min feature val of N features.
f_max (Variable): A 2-D tensor with shape [P, N], where P is the party
num and N is the feature num. Each row contains the
local max feature val of N features.
f_mean (Variable): A 2-D tensor with shape [P, N], where P is the party
num and N is the feature num. Each row contains the
local min feature val of N features.
sample_num (Variable): A 1-D tensor with shape [P], where P is the
party num. Each element contains sample num
of party_i.
Returns:
f_range (Variable): A 1-D tensor with shape [N], where N is the
feature num. Each element contains global
range of feature_i.
f_mean_out (Variable): A 1-D tensor with shape [N], where N is the
feature num. Each element contains global
range of feature_i.
Examples:
.. code-block:: python
import paddle_fl.mpc as pfl_mpc
pfl_mpc.init("aby3", role, "localhost", redis_server, redis_port)
# 2 for share, 4 for 4 party, 100 for feat_num
input_size = [2, 4, 100]
mi = pfl_mpc.data(name='mi', shape=input_size, dtype='int64')
ma = pfl_mpc.data(name='ma', shape=input_size, dtype='int64')
me = pfl_mpc.data(name='me', shape=input_size, dtype='int64')
sn = pfl_mpc.data(name='sn', shape=input_size[:-1], dtype='int64')
out0, out1 = pfl_mpc.layers.mean_normalize(f_min=mi, f_max=ma,
f_mean=me, sample_num=sn)
exe = fluid.Executor(place=fluid.CPUPlace())
# feed encrypted data
f_range, f_mean = exe.run(feed={'mi': f_min, 'ma': f_max,
'me': f_mean, 'sn': sample_num}, fetch_list=[out0, out1])
'''
helper = MpcLayerHelper("mean_normalize", **locals())
# dtype = helper.input_dtype()
dtype = 'int64'
check_dtype(dtype, 'f_min', ['int64'], 'mean_normalize')
check_dtype(dtype, 'f_max', ['int64'], 'mean_normalize')
check_dtype(dtype, 'f_mean', ['int64'], 'mean_normalize')
check_dtype(dtype, 'sample_num', ['int64'], 'mean_normalize')
f_range = helper.create_mpc_variable_for_type_inference(dtype=f_min.dtype)
f_mean_out= helper.create_mpc_variable_for_type_inference(dtype=f_min.dtype)
total_num = reduce_sum(sample_num)
op_type = 'mean_normalize'
helper.append_op(
type='mpc_' + op_type,
inputs={
"Min": f_min,
"Max": f_max,
"Mean": f_mean,
"SampleNum": sample_num,
"TotalNum": total_num,
},
outputs={
"Range": f_range,
"MeanOut": f_mean_out,
},
)
return f_range, f_mean_out
...@@ -37,6 +37,7 @@ __all__ = [ ...@@ -37,6 +37,7 @@ __all__ = [
'pool2d', 'pool2d',
'batch_norm', 'batch_norm',
'reshape', 'reshape',
'mean_normalize',
] ]
...@@ -612,7 +613,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None): ...@@ -612,7 +613,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None):
helper = MpcLayerHelper("reshape2", **locals()) helper = MpcLayerHelper("reshape2", **locals())
_helper = LayerHelper("reshape2", **locals()) _helper = LayerHelper("reshape2", **locals())
def get_new_shape_tensor(list_shape): def get_new_shape_tensor(list_shape):
new_shape_tensor = [] new_shape_tensor = []
for dim in list_shape: for dim in list_shape:
...@@ -625,7 +626,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None): ...@@ -625,7 +626,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None):
fill_constant([1], 'int32', dim, force_cpu=True, out=temp_out) fill_constant([1], 'int32', dim, force_cpu=True, out=temp_out)
new_shape_tensor.append(temp_out) new_shape_tensor.append(temp_out)
return new_shape_tensor return new_shape_tensor
def get_attr_shape(list_shape): def get_attr_shape(list_shape):
unk_dim_idx = -1 unk_dim_idx = -1
attrs_shape = [] attrs_shape = []
...@@ -662,13 +663,13 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None): ...@@ -662,13 +663,13 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None):
assert len(shape) > 0, ("The size of 'shape' in reshape can't be zero, " assert len(shape) > 0, ("The size of 'shape' in reshape can't be zero, "
"but received %s." % len(shape)) "but received %s." % len(shape))
attrs["shape"] = get_attr_shape(shape) attrs["shape"] = get_attr_shape(shape)
if utils._contain_var(shape): if utils._contain_var(shape):
inputs['ShapeTensor'] = get_new_shape_tensor(shape) inputs['ShapeTensor'] = get_new_shape_tensor(shape)
elif isinstance(actual_shape, Variable): elif isinstance(actual_shape, Variable):
actual_shape.stop_gradient = True actual_shape.stop_gradient = True
inputs["Shape"] = actual_shape inputs["Shape"] = actual_shape
out = x if inplace else helper.create_mpc_variable_for_type_inference( out = x if inplace else helper.create_mpc_variable_for_type_inference(
dtype=x.dtype) dtype=x.dtype)
x_shape = helper.create_mpc_variable_for_type_inference(dtype=x.dtype) x_shape = helper.create_mpc_variable_for_type_inference(dtype=x.dtype)
...@@ -680,3 +681,92 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None): ...@@ -680,3 +681,92 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None):
"XShape": x_shape}) "XShape": x_shape})
return helper.append_activation(out) return helper.append_activation(out)
def mean_normalize(f_min, f_max, f_mean, sample_num):
'''
Mean normalization is a method used to normalize the range of independent
variables or features of data.
Refer to:
https://en.wikipedia.org/wiki/Feature_scaling#Mean_normalization
Args:
f_min (Variable): A 2-D tensor with shape [P, N], where P is the party
num and N is the feature num. Each row contains the
local min feature val of N features.
f_max (Variable): A 2-D tensor with shape [P, N], where P is the party
num and N is the feature num. Each row contains the
local max feature val of N features.
f_mean (Variable): A 2-D tensor with shape [P, N], where P is the party
num and N is the feature num. Each row contains the
local min feature val of N features.
sample_num (Variable): A 1-D tensor with shape [P], where P is the
party num. Each element contains sample num
of party_i.
Returns:
f_range (Variable): A 1-D tensor with shape [N], where N is the
feature num. Each element contains global
range of feature_i.
f_mean_out (Variable): A 1-D tensor with shape [N], where N is the
feature num. Each element contains global
range of feature_i.
Examples:
.. code-block:: python
import paddle_fl.mpc as pfl_mpc
pfl_mpc.init("aby3", role, "localhost", redis_server, redis_port)
# 2 for share, 4 for 4 party, 100 for feat_num
input_size = [2, 4, 100]
mi = pfl_mpc.data(name='mi', shape=input_size, dtype='int64')
ma = pfl_mpc.data(name='ma', shape=input_size, dtype='int64')
me = pfl_mpc.data(name='me', shape=input_size, dtype='int64')
sn = pfl_mpc.data(name='sn', shape=input_size[:-1], dtype='int64')
out0, out1 = pfl_mpc.layers.mean_normalize(f_min=mi, f_max=ma,
f_mean=me, sample_num=sn)
exe = fluid.Executor(place=fluid.CPUPlace())
# feed encrypted data
f_range, f_mean = exe.run(feed={'mi': f_min, 'ma': f_max,
'me': f_mean, 'sn': sample_num}, fetch_list=[out0, out1])
'''
helper = MpcLayerHelper("mean_normalize", **locals())
# dtype = helper.input_dtype()
dtype = 'int64'
check_dtype(dtype, 'f_min', ['int64'], 'mean_normalize')
check_dtype(dtype, 'f_max', ['int64'], 'mean_normalize')
check_dtype(dtype, 'f_mean', ['int64'], 'mean_normalize')
check_dtype(dtype, 'sample_num', ['int64'], 'mean_normalize')
f_range = helper.create_mpc_variable_for_type_inference(dtype=f_min.dtype)
f_mean_out= helper.create_mpc_variable_for_type_inference(dtype=f_min.dtype)
# to avoid circular dependencies
from .math import reduce_sum
total_num = reduce_sum(sample_num)
op_type = 'mean_normalize'
helper.append_op(
type='mpc_' + op_type,
inputs={
"Min": f_min,
"Max": f_max,
"Mean": f_mean,
"SampleNum": sample_num,
"TotalNum": total_num,
},
outputs={
"Range": f_range,
"MeanOut": f_mean_out,
},
)
return f_range, f_mean_out
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册