提交 da3e8728 编写于 作者: Z zhenghuanhuan

fix update mechanisms parameters will cause `dont't support Closure with free variable yet` error.

update code about review suggestions.

fix pylint warnings.
上级 98b88e55
# Release 0.5.0-beta
## Major Features and Improvements
### Differential privacy model training
* Optimizers with differential privacy
* Differential privacy model training now supports both Pynative mode and graph mode.
* Graph mode is recommended for its performance.
## Bugfixes
## Contributors
Thanks goes to these wonderful people:
Liu Liu, Huanhuan Zheng, Xiulang Jin, Zhidan Liu.
Contributions of any kind are welcome!
# Release 0.3.0-alpha # Release 0.3.0-alpha
## Major Features and Improvements ## Major Features and Improvements
......
...@@ -476,7 +476,6 @@ class DiverseInputIterativeMethod(BasicIterativeMethod): ...@@ -476,7 +476,6 @@ class DiverseInputIterativeMethod(BasicIterativeMethod):
is_targeted=is_targeted, is_targeted=is_targeted,
nb_iter=nb_iter, nb_iter=nb_iter,
loss_fn=loss_fn) loss_fn=loss_fn)
# FGSM default alpha is None equal alpha=1
self.prob = check_param_type('prob', prob, float) self.prob = check_param_type('prob', prob, float)
......
...@@ -19,7 +19,6 @@ from abc import abstractmethod ...@@ -19,7 +19,6 @@ from abc import abstractmethod
from mindspore import Tensor from mindspore import Tensor
from mindspore.nn import Cell from mindspore.nn import Cell
from mindspore.ops import operations as P from mindspore.ops import operations as P
from mindspore.ops import functional as F
from mindspore.common.parameter import Parameter from mindspore.common.parameter import Parameter
from mindspore.common import dtype as mstype from mindspore.common import dtype as mstype
...@@ -124,6 +123,8 @@ class GaussianRandom(Mechanisms): ...@@ -124,6 +123,8 @@ class GaussianRandom(Mechanisms):
seed(int): Original random seed, if seed=0 random normal will use secure seed(int): Original random seed, if seed=0 random normal will use secure
random number. IF seed!=0 random normal will generate values using random number. IF seed!=0 random normal will generate values using
given seed. Default: 0. given seed. Default: 0.
policy(str): Mechanisms parameters update policy. Default: None, no
parameters need update.
Returns: Returns:
Tensor, generated noise with shape like given gradients. Tensor, generated noise with shape like given gradients.
...@@ -137,7 +138,7 @@ class GaussianRandom(Mechanisms): ...@@ -137,7 +138,7 @@ class GaussianRandom(Mechanisms):
>>> print(res) >>> print(res)
""" """
def __init__(self, norm_bound=0.5, initial_noise_multiplier=1.5, seed=0): def __init__(self, norm_bound=0.5, initial_noise_multiplier=1.5, seed=0, policy=None):
super(GaussianRandom, self).__init__() super(GaussianRandom, self).__init__()
self._norm_bound = check_value_positive('norm_bound', norm_bound) self._norm_bound = check_value_positive('norm_bound', norm_bound)
self._norm_bound = Tensor(norm_bound, mstype.float32) self._norm_bound = Tensor(norm_bound, mstype.float32)
...@@ -146,6 +147,7 @@ class GaussianRandom(Mechanisms): ...@@ -146,6 +147,7 @@ class GaussianRandom(Mechanisms):
self._initial_noise_multiplier = Tensor(initial_noise_multiplier, mstype.float32) self._initial_noise_multiplier = Tensor(initial_noise_multiplier, mstype.float32)
self._mean = Tensor(0, mstype.float32) self._mean = Tensor(0, mstype.float32)
self._normal = P.Normal(seed=seed) self._normal = P.Normal(seed=seed)
self._decay_policy = policy
def construct(self, gradients): def construct(self, gradients):
""" """
...@@ -218,14 +220,8 @@ class AdaGaussianRandom(Mechanisms): ...@@ -218,14 +220,8 @@ class AdaGaussianRandom(Mechanisms):
raise NameError("The decay_policy must be in ['Time', 'Step'], but " raise NameError("The decay_policy must be in ['Time', 'Step'], but "
"get {}".format(decay_policy)) "get {}".format(decay_policy))
self._decay_policy = decay_policy self._decay_policy = decay_policy
self._sub = P.Sub()
self._mul = P.Mul() self._mul = P.Mul()
self._add = P.TensorAdd()
self._div = P.Div()
self._dtype = mstype.float32
self._normal = P.Normal(seed=seed) self._normal = P.Normal(seed=seed)
self._assign = P.Assign()
self._one = Tensor(1, self._dtype)
def construct(self, gradients): def construct(self, gradients):
""" """
...@@ -239,14 +235,48 @@ class AdaGaussianRandom(Mechanisms): ...@@ -239,14 +235,48 @@ class AdaGaussianRandom(Mechanisms):
""" """
shape = P.Shape()(gradients) shape = P.Shape()(gradients)
noise = self._normal(shape, self._mean, self._mul(self._noise_multiplier, self._norm_bound)) noise = self._normal(shape, self._mean, self._mul(self._noise_multiplier, self._norm_bound))
return noise
if self._decay_policy == 'Time':
temp = self._div(self._initial_noise_multiplier,
self._noise_multiplier)
temp = self._add(temp, self._noise_decay_rate)
multiplier = self._assign(self._noise_multiplier, self._div(self._initial_noise_multiplier, temp))
else:
temp = self._sub(self._one, self._noise_decay_rate)
multiplier = self._assign(self._noise_multiplier, self._mul(temp, self._noise_multiplier))
return F.depend(noise, multiplier) class _MechanismsParamsUpdater(Cell):
"""
Update mechanisms parameters, the parameters will refresh in train period.
Args:
policy(str): Pass in by the mechanisms class, mechanisms parameters update policy.
decay_rate(Tensor): Pass in by the mechanisms class, hyper parameter for controlling the decay size.
cur_params(Parameter): Pass in by the mechanisms class, current params value in this time.
init_params(Parameter):Pass in by the mechanisms class, initial params value to be updated.
Returns:
Tuple, next params value.
"""
def __init__(self, policy, decay_rate, cur_params, init_params):
super(_MechanismsParamsUpdater, self).__init__()
self._policy = policy
self._decay_rate = decay_rate
self._cur_params = cur_params
self._init_params = init_params
self._div = P.Sub()
self._add = P.TensorAdd()
self._assign = P.Assign()
self._sub = P.Sub()
self._one = Tensor(1, mstype.float32)
self._mul = P.Mul()
def construct(self):
"""
update parameters to `self._cur_params`.
Returns:
Tuple, next step parameters value.
"""
if self._policy == 'Time':
temp = self._div(self._init_params, self._cur_params)
temp = self._add(temp, self._decay_rate)
next_params = self._assign(self._cur_params, self._div(self._init_params, temp))
else:
temp = self._sub(self._one, self._decay_rate)
next_params = self._assign(self._cur_params, self._mul(temp, self._cur_params))
return next_params
...@@ -21,9 +21,14 @@ from mindspore.ops import operations as P ...@@ -21,9 +21,14 @@ from mindspore.ops import operations as P
from mindspore.ops import functional as F from mindspore.ops import functional as F
from mindspore.common import dtype as mstype from mindspore.common import dtype as mstype
from mindarmour.diff_privacy.mechanisms.mechanisms import MechanismsFactory from mindarmour.utils.logger import LogUtil
from mindarmour.diff_privacy import MechanismsFactory
from mindarmour.diff_privacy.mechanisms.mechanisms import _MechanismsParamsUpdater
from mindarmour.utils._check_param import check_int_positive from mindarmour.utils._check_param import check_int_positive
LOGGER = LogUtil.get_instance()
TAG = 'DP optimizer'
_grad_scale = C.MultitypeFuncGraph("grad_scale") _grad_scale = C.MultitypeFuncGraph("grad_scale")
_reciprocal = P.Reciprocal() _reciprocal = P.Reciprocal()
...@@ -97,7 +102,9 @@ class DPOptimizerClassFactory: ...@@ -97,7 +102,9 @@ class DPOptimizerClassFactory:
if policy == 'Adam': if policy == 'Adam':
cls = self._get_dp_optimizer_class(nn.Adam, self.mech, self._micro_batches, *args, **kwargs) cls = self._get_dp_optimizer_class(nn.Adam, self.mech, self._micro_batches, *args, **kwargs)
return cls return cls
raise NameError("The {} is not implement, please choose ['SGD', 'Momentum', 'Adam']".format(policy)) msg = "The {} is not implement, please choose ['SGD', 'Momentum', 'Adam']".format(policy)
LOGGER.error(TAG, msg)
raise NameError(msg)
def _get_dp_optimizer_class(self, cls, mech, micro_batches): def _get_dp_optimizer_class(self, cls, mech, micro_batches):
""" """
...@@ -119,6 +126,14 @@ class DPOptimizerClassFactory: ...@@ -119,6 +126,14 @@ class DPOptimizerClassFactory:
self._hyper_map = C.HyperMap() self._hyper_map = C.HyperMap()
self._micro_float = Tensor(micro_batches, mstype.float32) self._micro_float = Tensor(micro_batches, mstype.float32)
self._mech_param_updater = None
if self._mech is not None and self._mech._decay_policy is not None:
self._mech_param_updater = _MechanismsParamsUpdater(policy=self._mech._decay_policy,
decay_rate=self._mech._noise_decay_rate,
cur_params=self._mech._noise_multiplier,
init_params=
self._mech._initial_noise_multiplier)
def construct(self, gradients): def construct(self, gradients):
""" """
construct a compute flow. construct a compute flow.
...@@ -126,6 +141,10 @@ class DPOptimizerClassFactory: ...@@ -126,6 +141,10 @@ class DPOptimizerClassFactory:
grad_noise = self._hyper_map(self._mech, gradients) grad_noise = self._hyper_map(self._mech, gradients)
grads = self._tuple_add(gradients, grad_noise) grads = self._tuple_add(gradients, grad_noise)
grads = self._hyper_map(F.partial(_grad_scale, self._micro_float), grads) grads = self._hyper_map(F.partial(_grad_scale, self._micro_float), grads)
# update mech parameters
if self._mech_param_updater is not None:
multiplier = self._mech_param_updater()
grads = F.depend(grads, multiplier)
gradients = super(DPOptimizer, self).construct(grads) gradients = super(DPOptimizer, self).construct(grads)
return gradients return gradients
......
...@@ -47,10 +47,15 @@ from mindspore.nn.wrap.loss_scale import _grad_overflow ...@@ -47,10 +47,15 @@ from mindspore.nn.wrap.loss_scale import _grad_overflow
from mindspore.nn import Cell from mindspore.nn import Cell
from mindspore import ParameterTuple from mindspore import ParameterTuple
from mindarmour.utils.logger import LogUtil
from mindarmour.diff_privacy.mechanisms.mechanisms import _MechanismsParamsUpdater
from mindarmour.utils._check_param import check_param_type from mindarmour.utils._check_param import check_param_type
from mindarmour.utils._check_param import check_value_positive from mindarmour.utils._check_param import check_value_positive
from mindarmour.utils._check_param import check_int_positive from mindarmour.utils._check_param import check_int_positive
LOGGER = LogUtil.get_instance()
TAG = 'DP model'
GRADIENT_CLIP_TYPE = 1 GRADIENT_CLIP_TYPE = 1
_grad_scale = C.MultitypeFuncGraph("grad_scale") _grad_scale = C.MultitypeFuncGraph("grad_scale")
_reciprocal = P.Reciprocal() _reciprocal = P.Reciprocal()
...@@ -105,13 +110,19 @@ class DPModel(Model): ...@@ -105,13 +110,19 @@ class DPModel(Model):
norm_clip = check_param_type('norm_clip', norm_clip, float) norm_clip = check_param_type('norm_clip', norm_clip, float)
self._norm_clip = check_value_positive('norm_clip', norm_clip) self._norm_clip = check_value_positive('norm_clip', norm_clip)
if mech is not None and "DPOptimizer" in kwargs['optimizer'].__class__.__name__: if mech is not None and "DPOptimizer" in kwargs['optimizer'].__class__.__name__:
raise ValueError('DPOptimizer is not supported while mech is not None') msg = 'DPOptimizer is not supported while mech is not None'
LOGGER.error(TAG, msg)
raise ValueError(msg)
if mech is None: if mech is None:
if "DPOptimizer" in kwargs['optimizer'].__class__.__name__: if "DPOptimizer" in kwargs['optimizer'].__class__.__name__:
if context.get_context('mode') != context.PYNATIVE_MODE: if context.get_context('mode') != context.PYNATIVE_MODE:
raise ValueError('DPOptimizer just support pynative mode currently.') msg = 'DPOptimizer just support pynative mode currently.'
LOGGER.error(TAG, msg)
raise ValueError(msg)
else: else:
raise ValueError('DPModel should set mech or DPOptimizer configure, please refer to example.') msg = 'DPModel should set mech or DPOptimizer configure, please refer to example.'
LOGGER.error(TAG, msg)
raise ValueError(msg)
self._mech = mech self._mech = mech
super(DPModel, self).__init__(**kwargs) super(DPModel, self).__init__(**kwargs)
...@@ -163,10 +174,11 @@ class DPModel(Model): ...@@ -163,10 +174,11 @@ class DPModel(Model):
if update_cell is not None: if update_cell is not None:
# only cpu not support `TrainOneStepWithLossScaleCell` for control flow. # only cpu not support `TrainOneStepWithLossScaleCell` for control flow.
if not context.get_context("enable_ge") and context.get_context("device_target") == "CPU": if not context.get_context("enable_ge") and context.get_context("device_target") == "CPU":
raise ValueError("Only `loss_scale_manager=None` and " msg = "Only `loss_scale_manager=None` and `loss_scale_manager=FixedLossScaleManager(drop_overflow" \
"`loss_scale_manager=FixedLossScaleManager(drop_overflow_update=False)`" "_update=False)` are supported in current version. If you use `O2` option, please use " \
"are supported in current version. If you use `O2` option, please" "`loss_scale_manager=None` or `FixedLossScaleManager`"
"use `loss_scale_manager=None` or `FixedLossScaleManager`") LOGGER.error(TAG, msg)
raise ValueError(msg)
network = _TrainOneStepWithLossScaleCell(network, network = _TrainOneStepWithLossScaleCell(network,
optimizer, optimizer,
scale_update_cell=update_cell, scale_update_cell=update_cell,
...@@ -174,6 +186,7 @@ class DPModel(Model): ...@@ -174,6 +186,7 @@ class DPModel(Model):
norm_clip=self._norm_clip, norm_clip=self._norm_clip,
mech=self._mech).set_train() mech=self._mech).set_train()
return network return network
network = _TrainOneStepCell(network, network = _TrainOneStepCell(network,
optimizer, optimizer,
loss_scale, loss_scale,
...@@ -182,7 +195,8 @@ class DPModel(Model): ...@@ -182,7 +195,8 @@ class DPModel(Model):
mech=self._mech).set_train() mech=self._mech).set_train()
return network return network
def _build_train_network(self):
def _build_train_network(self):
"""Build train network""" """Build train network"""
network = self._network network = self._network
if self._micro_batches: if self._micro_batches:
...@@ -358,6 +372,13 @@ class _TrainOneStepWithLossScaleCell(Cell): ...@@ -358,6 +372,13 @@ class _TrainOneStepWithLossScaleCell(Cell):
self._hyper_map = C.HyperMap() self._hyper_map = C.HyperMap()
self._micro_float = Tensor(micro_batches, mstype.float32) self._micro_float = Tensor(micro_batches, mstype.float32)
self._mech_param_updater = None
if self._mech is not None and self._mech._decay_policy is not None:
self._mech_param_updater = _MechanismsParamsUpdater(policy=self._mech._decay_policy,
decay_rate=self._mech._noise_decay_rate,
cur_params=self._mech._noise_multiplier,
init_params=self._mech._initial_noise_multiplier)
def construct(self, data, label, sens=None): def construct(self, data, label, sens=None):
""" """
construct a compute flow. construct a compute flow.
...@@ -380,14 +401,14 @@ class _TrainOneStepWithLossScaleCell(Cell): ...@@ -380,14 +401,14 @@ class _TrainOneStepWithLossScaleCell(Cell):
record_labels = self._split(label) record_labels = self._split(label)
# first index # first index
loss = self.network(record_datas[0], record_labels[0]) loss = self.network(record_datas[0], record_labels[0])
scaling_sens_filled = C.ones_like(loss)*F.cast(scaling_sens, F.dtype(loss)) scaling_sens_filled = C.ones_like(loss) * F.cast(scaling_sens, F.dtype(loss))
record_grad = self.grad(self.network, weights)(record_datas[0], record_labels[0], scaling_sens_filled) record_grad = self.grad(self.network, weights)(record_datas[0], record_labels[0], scaling_sens_filled)
record_grad = self._clip_by_global_norm(record_grad, GRADIENT_CLIP_TYPE, self._l2_norm) record_grad = self._clip_by_global_norm(record_grad, GRADIENT_CLIP_TYPE, self._l2_norm)
grads = record_grad grads = record_grad
total_loss = loss total_loss = loss
for i in range(1, self._micro_batches): for i in range(1, self._micro_batches):
loss = self.network(record_datas[i], record_labels[i]) loss = self.network(record_datas[i], record_labels[i])
scaling_sens_filled = C.ones_like(loss)*F.cast(scaling_sens, F.dtype(loss)) scaling_sens_filled = C.ones_like(loss) * F.cast(scaling_sens, F.dtype(loss))
record_grad = self.grad(self.network, weights)(record_datas[i], record_labels[i], scaling_sens_filled) record_grad = self.grad(self.network, weights)(record_datas[i], record_labels[i], scaling_sens_filled)
record_grad = self._clip_by_global_norm(record_grad, GRADIENT_CLIP_TYPE, self._l2_norm) record_grad = self._clip_by_global_norm(record_grad, GRADIENT_CLIP_TYPE, self._l2_norm)
grads = self._tuple_add(grads, record_grad) grads = self._tuple_add(grads, record_grad)
...@@ -398,6 +419,10 @@ class _TrainOneStepWithLossScaleCell(Cell): ...@@ -398,6 +419,10 @@ class _TrainOneStepWithLossScaleCell(Cell):
grad_noise = self._hyper_map(self._mech, grads) grad_noise = self._hyper_map(self._mech, grads)
grads = self._tuple_add(grads, grad_noise) grads = self._tuple_add(grads, grad_noise)
grads = self._hyper_map(F.partial(_grad_scale, self._micro_float), grads) grads = self._hyper_map(F.partial(_grad_scale, self._micro_float), grads)
# update mech parameters
if self._mech_param_updater is not None:
multiplier = self._mech_param_updater()
loss = F.depend(loss, multiplier)
grads = self.hyper_map(F.partial(_grad_scale, scaling_sens), grads) grads = self.hyper_map(F.partial(_grad_scale, scaling_sens), grads)
# apply grad reducer on grads # apply grad reducer on grads
...@@ -474,6 +499,10 @@ class _TrainOneStepCell(Cell): ...@@ -474,6 +499,10 @@ class _TrainOneStepCell(Cell):
self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
# dp params # dp params
if micro_batches is None:
msg = 'micro_batches must give in differential privacy, but got value: {}'.format(micro_batches)
LOGGER.error(TAG, msg)
raise ValueError(msg)
self._micro_batches = micro_batches self._micro_batches = micro_batches
norm_clip = check_param_type('norm_clip', norm_clip, float) norm_clip = check_param_type('norm_clip', norm_clip, float)
self._l2_norm = check_value_positive('norm_clip', norm_clip) self._l2_norm = check_value_positive('norm_clip', norm_clip)
...@@ -484,6 +513,13 @@ class _TrainOneStepCell(Cell): ...@@ -484,6 +513,13 @@ class _TrainOneStepCell(Cell):
self._hyper_map = C.HyperMap() self._hyper_map = C.HyperMap()
self._micro_float = Tensor(micro_batches, mstype.float32) self._micro_float = Tensor(micro_batches, mstype.float32)
self._mech_param_updater = None
if self._mech is not None and self._mech._decay_policy is not None:
self._mech_param_updater = _MechanismsParamsUpdater(policy=self._mech._decay_policy,
decay_rate=self._mech._noise_decay_rate,
cur_params=self._mech._noise_multiplier,
init_params=self._mech._initial_noise_multiplier)
def construct(self, data, label): def construct(self, data, label):
""" """
construct a compute flow. construct a compute flow.
...@@ -510,6 +546,10 @@ class _TrainOneStepCell(Cell): ...@@ -510,6 +546,10 @@ class _TrainOneStepCell(Cell):
grad_noise = self._hyper_map(self._mech, grads) grad_noise = self._hyper_map(self._mech, grads)
grads = self._tuple_add(grads, grad_noise) grads = self._tuple_add(grads, grad_noise)
grads = self._hyper_map(F.partial(_grad_scale, self._micro_float), grads) grads = self._hyper_map(F.partial(_grad_scale, self._micro_float), grads)
# update mech parameters
if self._mech_param_updater is not None:
multiplier = self._mech_param_updater()
loss = F.depend(loss, multiplier)
if self.reducer_flag: if self.reducer_flag:
# apply grad reducer on grads # apply grad reducer on grads
......
...@@ -41,7 +41,7 @@ def dataset_generator(batch_size, batches): ...@@ -41,7 +41,7 @@ def dataset_generator(batch_size, batches):
@pytest.mark.platform_x86_ascend_training @pytest.mark.platform_x86_ascend_training
@pytest.mark.env_card @pytest.mark.env_card
@pytest.mark.component_mindarmour @pytest.mark.component_mindarmour
def test_dp_model_pynative_mode(): def test_dp_model_with_pynative_mode():
context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
norm_clip = 1.0 norm_clip = 1.0
initial_noise_multiplier = 0.01 initial_noise_multiplier = 0.01
...@@ -96,3 +96,33 @@ def test_dp_model_with_graph_mode(): ...@@ -96,3 +96,33 @@ def test_dp_model_with_graph_mode():
ms_ds = ds.GeneratorDataset(dataset_generator(batch_size, batches), ['data', 'label']) ms_ds = ds.GeneratorDataset(dataset_generator(batch_size, batches), ['data', 'label'])
ms_ds.set_dataset_size(batch_size * batches) ms_ds.set_dataset_size(batch_size * batches)
model.train(epochs, ms_ds, dataset_sink_mode=False) model.train(epochs, ms_ds, dataset_sink_mode=False)
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_card
@pytest.mark.component_mindarmour
def test_dp_model_with_graph_mode_ada_gaussian():
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
norm_clip = 1.0
initial_noise_multiplier = 0.01
network = LeNet5()
batch_size = 32
batches = 128
epochs = 1
loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
mech = MechanismsFactory().create('AdaGaussian',
norm_bound=norm_clip,
initial_noise_multiplier=initial_noise_multiplier)
net_opt = nn.Momentum(network.trainable_params(), learning_rate=0.1, momentum=0.9)
model = DPModel(micro_batches=2,
norm_clip=norm_clip,
mech=mech,
network=network,
loss_fn=loss,
optimizer=net_opt,
metrics=None)
ms_ds = ds.GeneratorDataset(dataset_generator(batch_size, batches), ['data', 'label'])
ms_ds.set_dataset_size(batch_size * batches)
model.train(epochs, ms_ds, dataset_sink_mode=False)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册