提交 432d302a 编写于 作者: M mindspore-ci-bot 提交者: Gitee

!108 remove parameter 'iid' in GridSearchCV. Increase support for parallel...

!108 remove parameter 'iid' in GridSearchCV. Increase support for parallel training. Modify ut case. Add attack config verification.
Merge pull request !108 from liuluobin/master
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Verify attack config
"""
from mindarmour.utils.logger import LogUtil
LOGGER = LogUtil.get_instance()
TAG = "check_params"
def _is_positive_int(item):
"""
Verify that the value is a positive integer.
"""
if not isinstance(item, int) or item <= 0:
return False
return True
def _is_non_negative_int(item):
"""
Verify that the value is a non-negative integer.
"""
if not isinstance(item, int) or item < 0:
return False
return True
def _is_positive_float(item):
"""
Verify that value is a positive number.
"""
if not isinstance(item, (int, float)) or item <= 0:
return False
return True
def _is_non_negative_float(item):
"""
Verify that value is a non-negative number.
"""
if not isinstance(item, (int, float)) or item < 0:
return False
return True
def _is_positive_int_tuple(item):
"""
Verify that the input parameter is a positive integer tuple.
"""
if not isinstance(item, tuple):
return False
for i in item:
if not _is_positive_int(i):
return False
return True
def _is_dict(item):
"""
Check whether the type is dict.
"""
return isinstance(item, dict)
VALID_PARAMS_DICT = {
"knn": {
"n_neighbors": [_is_positive_int],
"weights": [{"uniform", "distance"}],
"algorithm": [{"auto", "ball_tree", "kd_tree", "brute"}],
"leaf_size": [_is_positive_int],
"p": [_is_positive_int],
"metric": None,
"metric_params": None,
},
"lr": {
"penalty": [{"l1", "l2", "elasticnet", "none"}],
"dual": [{True, False}],
"tol": [_is_positive_float],
"C": [_is_positive_float],
"fit_intercept": [{True, False}],
"intercept_scaling": [_is_positive_float],
"class_weight": [{"balanced", None}, _is_dict],
"random_state": None,
"solver": [{"newton-cg", "lbfgs", "liblinear", "sag", "saga"}]
},
"mlp": {
"hidden_layer_sizes": [_is_positive_int_tuple],
"activation": [{"identity", "logistic", "tanh", "relu"}],
"solver": {"lbfgs", "sgd", "adam"},
"alpha": [_is_positive_float],
"batch_size": [{"auto"}, _is_positive_int],
"learning_rate": [{"constant", "invscaling", "adaptive"}],
"learning_rate_init": [_is_positive_float],
"power_t": [_is_positive_float],
"max_iter": [_is_positive_int],
"shuffle": [{True, False}],
"random_state": None,
"tol": [_is_positive_float],
"verbose": [{True, False}],
"warm_start": [{True, False}],
"momentum": [_is_positive_float],
"nesterovs_momentum": [{True, False}],
"early_stopping": [{True, False}],
"validation_fraction": [_is_positive_float],
"beta_1": [_is_positive_float],
"beta_2": [_is_positive_float],
"epsilon": [_is_positive_float],
"n_iter_no_change": [_is_positive_int],
"max_fun": [_is_positive_int]
},
"rf": {
"n_estimators": [_is_positive_int],
"criterion": [{"gini", "entropy"}],
"max_depth": [_is_positive_int],
"min_samples_split": [_is_positive_float],
"min_samples_leaf": [_is_positive_float],
"min_weight_fraction_leaf": [_is_non_negative_float],
"max_features": [{"auto", "sqrt", "log2", None}, _is_positive_float],
"max_leaf_nodes": [_is_positive_int, {None}],
"min_impurity_decrease": {_is_non_negative_float},
"min_impurity_split": [{None}, _is_positive_float],
"bootstrap": [{True, False}],
"oob_scroe": [{True, False}],
"n_jobs": [_is_positive_int, {None}],
"random_state": None,
"verbose": [_is_non_negative_int],
"warm_start": [{True, False}],
"class_weight": None,
"ccp_alpha": [_is_non_negative_float],
"max_samples": [_is_positive_float]
}
}
def _check_config(config_list, check_params):
"""
Verify that config_list is valid.
Check_params is the valid value range of the parameter.
"""
if not isinstance(config_list, (list, tuple)):
msg = "Type of parameter 'config_list' must be list, but got {}.".format(type(config_list))
LOGGER.error(TAG, msg)
raise TypeError(msg)
for config in config_list:
if not isinstance(config, dict):
msg = "Type of each config in config_list must be dict, but got {}.".format(type(config))
LOGGER.error(TAG, msg)
raise TypeError(msg)
if set(config.keys()) != {"params", "method"}:
msg = "Keys of each config in config_list must be {}," \
"but got {}.".format({'method', 'params'}, set(config.keys()))
LOGGER.error(TAG, msg)
raise KeyError(msg)
method = str.lower(config["method"])
params = config["params"]
if method not in check_params.keys():
msg = "Method {} is not supported.".format(method)
LOGGER.error(TAG, msg)
raise ValueError(msg)
if not params.keys() <= check_params[method].keys():
msg = "Params in method {} is not accepted, the parameters " \
"that can be set are {}.".format(method, set(check_params[method].keys()))
LOGGER.error(TAG, msg)
raise KeyError(msg)
for param_key in params.keys():
param_value = params[param_key]
candidate_values = check_params[method][param_key]
if not isinstance(param_value, list):
msg = "The parameter '{}' in method '{}' setting must within the range of " \
"changeable parameters.".format(param_key, method)
LOGGER.error(TAG, msg)
raise ValueError(msg)
if candidate_values is None:
continue
for item_value in param_value:
flag = False
for candidate_value in candidate_values:
if isinstance(candidate_value, set) and item_value in candidate_value:
flag = True
break
elif candidate_value(item_value):
flag = True
break
if not flag:
msg = "Setting of parmeter {} in method {} is invalid".format(param_key, method)
raise ValueError(msg)
def check_config_params(config_list):
"""
External interfaces to verify attack config.
"""
_check_config(config_list, VALID_PARAMS_DICT)
......@@ -27,7 +27,7 @@ LOGGER = LogUtil.get_instance()
TAG = "Attacker"
def _attack_knn(features, labels, param_grid):
def _attack_knn(features, labels, param_grid, n_jobs):
"""
Train and return a KNN model.
......@@ -35,20 +35,21 @@ def _attack_knn(features, labels, param_grid):
features (numpy.ndarray): Loss and logits characteristics of each sample.
labels (numpy.ndarray): Labels of each sample whether belongs to training set.
param_grid (dict): Setting of GridSearchCV.
n_jobs (int): Number of jobs run in parallel. -1 means using all processors,
otherwise the value of n_jobs must be a positive integer.
Returns:
sklearn.model_selection.GridSearchCV, trained model.
"""
knn_model = KNeighborsClassifier()
knn_model = GridSearchCV(
knn_model, param_grid=param_grid, cv=3, n_jobs=1, iid=False,
verbose=0,
knn_model, param_grid=param_grid, cv=3, n_jobs=n_jobs, verbose=0,
)
knn_model.fit(X=features, y=labels)
return knn_model
def _attack_lr(features, labels, param_grid):
def _attack_lr(features, labels, param_grid, n_jobs):
"""
Train and return a LR model.
......@@ -56,20 +57,21 @@ def _attack_lr(features, labels, param_grid):
features (numpy.ndarray): Loss and logits characteristics of each sample.
labels (numpy.ndarray): Labels of each sample whether belongs to training set.
param_grid (dict): Setting of GridSearchCV.
n_jobs (int): Number of jobs run in parallel. -1 means using all processors,
otherwise the value of n_jobs must be a positive integer.
Returns:
sklearn.model_selection.GridSearchCV, trained model.
"""
lr_model = LogisticRegression(C=1.0, penalty="l2", max_iter=1000)
lr_model = LogisticRegression(C=1.0, penalty="l2", max_iter=300)
lr_model = GridSearchCV(
lr_model, param_grid=param_grid, cv=3, n_jobs=1, iid=False,
verbose=0,
lr_model, param_grid=param_grid, cv=3, n_jobs=n_jobs, verbose=0,
)
lr_model.fit(X=features, y=labels)
return lr_model
def _attack_mlpc(features, labels, param_grid):
def _attack_mlpc(features, labels, param_grid, n_jobs):
"""
Train and return a MLPC model.
......@@ -77,20 +79,21 @@ def _attack_mlpc(features, labels, param_grid):
features (numpy.ndarray): Loss and logits characteristics of each sample.
labels (numpy.ndarray): Labels of each sample whether belongs to training set.
param_grid (dict): Setting of GridSearchCV.
n_jobs (int): Number of jobs run in parallel. -1 means using all processors,
otherwise the value of n_jobs must be a positive integer.
Returns:
sklearn.model_selection.GridSearchCV, trained model.
"""
mlpc_model = MLPClassifier(random_state=1, max_iter=300)
mlpc_model = GridSearchCV(
mlpc_model, param_grid=param_grid, cv=3, n_jobs=1, iid=False,
verbose=0,
mlpc_model, param_grid=param_grid, cv=3, n_jobs=n_jobs, verbose=0,
)
mlpc_model.fit(features, labels)
return mlpc_model
def _attack_rf(features, labels, random_grid):
def _attack_rf(features, labels, random_grid, n_jobs):
"""
Train and return a RF model.
......@@ -98,20 +101,22 @@ def _attack_rf(features, labels, random_grid):
features (numpy.ndarray): Loss and logits characteristics of each sample.
labels (numpy.ndarray): Labels of each sample whether belongs to training set.
random_grid (dict): Setting of RandomizedSearchCV.
n_jobs (int): Number of jobs run in parallel. -1 means using all processors,
otherwise the value of n_jobs must be a positive integer.
Returns:
sklearn.model_selection.RandomizedSearchCV, trained model.
"""
rf_model = RandomForestClassifier(max_depth=2, random_state=0)
rf_model = RandomizedSearchCV(
rf_model, param_distributions=random_grid, n_iter=7, cv=3, n_jobs=1,
iid=False, verbose=0,
rf_model, param_distributions=random_grid, n_iter=7, cv=3, n_jobs=n_jobs,
verbose=0,
)
rf_model.fit(features, labels)
return rf_model
def get_attack_model(features, labels, config):
def get_attack_model(features, labels, config, n_jobs=-1):
"""
Get trained attack model specify by config.
......@@ -123,6 +128,8 @@ def get_attack_model(features, labels, config):
params of each method must within the range of changeable parameters.
Tips of params implement can be found in
"https://scikit-learn.org/0.16/modules/generated/sklearn.grid_search.GridSearchCV.html".
n_jobs (int): Number of jobs run in parallel. -1 means using all processors,
otherwise the value of n_jobs must be a positive integer.
Returns:
sklearn.BaseEstimator, trained model specify by config["method"].
......@@ -136,13 +143,13 @@ def get_attack_model(features, labels, config):
method = str.lower(config["method"])
if method == "knn":
return _attack_knn(features, labels, config["params"])
return _attack_knn(features, labels, config["params"], n_jobs)
if method == "lr":
return _attack_lr(features, labels, config["params"])
return _attack_lr(features, labels, config["params"], n_jobs)
if method == "mlp":
return _attack_mlpc(features, labels, config["params"])
return _attack_mlpc(features, labels, config["params"], n_jobs)
if method == "rf":
return _attack_rf(features, labels, config["params"])
return _attack_rf(features, labels, config["params"], n_jobs)
msg = "Method {} is not supported.".format(config["method"])
LOGGER.error(TAG, msg)
......
......@@ -15,14 +15,16 @@
Membership Inference
"""
from multiprocessing import cpu_count
import numpy as np
import mindspore as ms
from mindspore.train import Model
from mindspore.dataset.engine import Dataset
from mindspore import Tensor
from mindarmour.diff_privacy.evaluation.attacker import get_attack_model
from mindarmour.utils.logger import LogUtil
from .attacker import get_attack_model
from ._check_config import check_config_params
LOGGER = LogUtil.get_instance()
TAG = "MembershipInference"
......@@ -101,13 +103,15 @@ class MembershipInference:
Args:
model (Model): Target model.
n_jobs (int): Number of jobs run in parallel. -1 means using all processors,
otherwise the value of n_jobs must be a positive integer.
Examples:
>>> train_1, train_2 are non-overlapping datasets from training dataset of target model.
>>> test_1, test_2 are non-overlapping datasets from test dataset of target model.
>>> We use train_1, test_1 to train attack model, and use train_2, test_2 to evaluate attack model.
>>> model = Model(network=net, loss_fn=loss, optimizer=opt, metrics={'acc', 'loss'})
>>> inference_model = MembershipInference(model)
>>> inference_model = MembershipInference(model, n_jobs=-1)
>>> config = [{"method": "KNN", "params": {"n_neighbors": [3, 5, 7]}}]
>>> inference_model.train(train_1, test_1, config)
>>> metrics = ["precision", "recall", "accuracy"]
......@@ -115,15 +119,26 @@ class MembershipInference:
Raises:
TypeError: If type of model is not mindspore.train.Model.
TypeError: If type of n_jobs is not int.
ValueError: The value of n_jobs is neither -1 nor a positive integer.
"""
def __init__(self, model):
def __init__(self, model, n_jobs=-1):
if not isinstance(model, Model):
msg = "Type of parameter 'model' must be Model, but got {}.".format(type(model))
LOGGER.error(TAG, msg)
raise TypeError(msg)
if not isinstance(n_jobs, int):
msg = "Type of parameter 'n_jobs' must be int, but got {}".format(type(n_jobs))
LOGGER.error(TAG, msg)
raise TypeError(msg)
if not (n_jobs == -1 or n_jobs > 0):
msg = "Value of n_jobs must be either -1 or positive integer, but got {}.".format(n_jobs)
LOGGER.error(TAG, msg)
raise ValueError(msg)
self.model = model
self.n_jobs = min(n_jobs, cpu_count())
self.method_list = ["knn", "lr", "mlp", "rf"]
self.attack_list = []
......@@ -162,24 +177,13 @@ class MembershipInference:
LOGGER.error(TAG, msg)
raise TypeError(msg)
for config in attack_config:
if not isinstance(config, dict):
msg = "Type of each config in 'attack_config' must be dict, but got {}.".format(type(config))
LOGGER.error(TAG, msg)
raise TypeError(msg)
if {"params", "method"} != set(config.keys()):
msg = "Each config in attack_config must have keys 'method' and 'params'," \
"but your key value is {}.".format(set(config.keys()))
LOGGER.error(TAG, msg)
raise KeyError(msg)
if str.lower(config["method"]) not in self.method_list:
msg = "Method {} is not support.".format(config["method"])
LOGGER.error(TAG, msg)
raise ValueError(msg)
check_config_params(attack_config) # Verify attack config.
features, labels = self._transform(dataset_train, dataset_test)
for config in attack_config:
self.attack_list.append(get_attack_model(features, labels, config))
self.attack_list.append(get_attack_model(features, labels, config, n_jobs=self.n_jobs))
def eval(self, dataset_train, dataset_test, metrics):
"""
......
......@@ -35,7 +35,7 @@ def test_get_knn_model():
"n_neighbors": [3],
}
}
knn_attacker = get_attack_model(features, labels, config_knn)
knn_attacker = get_attack_model(features, labels, config_knn, -1)
pred = knn_attacker.predict(features)
assert pred is not None
......@@ -54,7 +54,7 @@ def test_get_lr_model():
"C": np.logspace(-4, 2, 10),
}
}
lr_attacker = get_attack_model(features, labels, config_lr)
lr_attacker = get_attack_model(features, labels, config_lr, -1)
pred = lr_attacker.predict(features)
assert pred is not None
......@@ -75,7 +75,7 @@ def test_get_mlp_model():
"alpha": [0.0001, 0.001, 0.01],
}
}
mlpc_attacker = get_attack_model(features, labels, config_mlpc)
mlpc_attacker = get_attack_model(features, labels, config_mlpc, -1)
pred = mlpc_attacker.predict(features)
assert pred is not None
......@@ -98,6 +98,6 @@ def test_get_rf_model():
"min_samples_leaf": [1, 2, 4],
}
}
rf_attacker = get_attack_model(features, labels, config_rf)
rf_attacker = get_attack_model(features, labels, config_rf, -1)
pred = rf_attacker.predict(features)
assert pred is not None
......@@ -24,6 +24,7 @@ import numpy as np
import mindspore.dataset as ds
from mindspore import nn
from mindspore.train import Model
import mindspore.context as context
from mindarmour.diff_privacy.evaluation.membership_inference import MembershipInference
......@@ -31,6 +32,8 @@ sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../"))
from defenses.mock_net import Net
context.set_context(mode=context.GRAPH_MODE)
def dataset_generator(batch_size, batches):
"""mock training data."""
data = np.random.randn(batches*batch_size, 1, 32, 32).astype(
......@@ -51,7 +54,7 @@ def test_get_membership_inference_object():
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
opt = nn.Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9)
model = Model(network=net, loss_fn=loss, optimizer=opt)
inference_model = MembershipInference(model)
inference_model = MembershipInference(model, -1)
assert isinstance(inference_model, MembershipInference)
......@@ -65,7 +68,7 @@ def test_membership_inference_object_train():
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
opt = nn.Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9)
model = Model(network=net, loss_fn=loss, optimizer=opt)
inference_model = MembershipInference(model)
inference_model = MembershipInference(model, -1)
assert isinstance(inference_model, MembershipInference)
config = [{
......@@ -95,7 +98,7 @@ def test_membership_inference_eval():
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
opt = nn.Momentum(params=net.trainable_params(), learning_rate=0.1, momentum=0.9)
model = Model(network=net, loss_fn=loss, optimizer=opt)
inference_model = MembershipInference(model)
inference_model = MembershipInference(model, -1)
assert isinstance(inference_model, MembershipInference)
batch_size = 16
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册