提交 da3e8728 编写于 作者: Z zhenghuanhuan

fix update mechanisms parameters will cause `dont't support Closure with free variable yet` error.

update code about review suggestions.

fix pylint warnings.
上级 98b88e55
# Release 0.5.0-beta
## Major Features and Improvements
### Differential privacy model training
* Optimizers with differential privacy
* Differential privacy model training now supports both Pynative mode and graph mode.
* Graph mode is recommended for its performance.
## Bugfixes
## Contributors
Thanks goes to these wonderful people:
Liu Liu, Huanhuan Zheng, Xiulang Jin, Zhidan Liu.
Contributions of any kind are welcome!
# Release 0.3.0-alpha
## Major Features and Improvements
......
......@@ -476,7 +476,6 @@ class DiverseInputIterativeMethod(BasicIterativeMethod):
is_targeted=is_targeted,
nb_iter=nb_iter,
loss_fn=loss_fn)
# FGSM default alpha is None equal alpha=1
self.prob = check_param_type('prob', prob, float)
......
......@@ -19,7 +19,6 @@ from abc import abstractmethod
from mindspore import Tensor
from mindspore.nn import Cell
from mindspore.ops import operations as P
from mindspore.ops import functional as F
from mindspore.common.parameter import Parameter
from mindspore.common import dtype as mstype
......@@ -124,6 +123,8 @@ class GaussianRandom(Mechanisms):
seed(int): Original random seed, if seed=0 random normal will use secure
random number. IF seed!=0 random normal will generate values using
given seed. Default: 0.
policy(str): Mechanisms parameters update policy. Default: None, no
parameters need update.
Returns:
Tensor, generated noise with shape like given gradients.
......@@ -137,7 +138,7 @@ class GaussianRandom(Mechanisms):
>>> print(res)
"""
def __init__(self, norm_bound=0.5, initial_noise_multiplier=1.5, seed=0):
def __init__(self, norm_bound=0.5, initial_noise_multiplier=1.5, seed=0, policy=None):
super(GaussianRandom, self).__init__()
self._norm_bound = check_value_positive('norm_bound', norm_bound)
self._norm_bound = Tensor(norm_bound, mstype.float32)
......@@ -146,6 +147,7 @@ class GaussianRandom(Mechanisms):
self._initial_noise_multiplier = Tensor(initial_noise_multiplier, mstype.float32)
self._mean = Tensor(0, mstype.float32)
self._normal = P.Normal(seed=seed)
self._decay_policy = policy
def construct(self, gradients):
"""
......@@ -218,14 +220,8 @@ class AdaGaussianRandom(Mechanisms):
raise NameError("The decay_policy must be in ['Time', 'Step'], but "
"get {}".format(decay_policy))
self._decay_policy = decay_policy
self._sub = P.Sub()
self._mul = P.Mul()
self._add = P.TensorAdd()
self._div = P.Div()
self._dtype = mstype.float32
self._normal = P.Normal(seed=seed)
self._assign = P.Assign()
self._one = Tensor(1, self._dtype)
def construct(self, gradients):
"""
......@@ -239,14 +235,48 @@ class AdaGaussianRandom(Mechanisms):
"""
shape = P.Shape()(gradients)
noise = self._normal(shape, self._mean, self._mul(self._noise_multiplier, self._norm_bound))
return noise
if self._decay_policy == 'Time':
temp = self._div(self._initial_noise_multiplier,
self._noise_multiplier)
temp = self._add(temp, self._noise_decay_rate)
multiplier = self._assign(self._noise_multiplier, self._div(self._initial_noise_multiplier, temp))
else:
temp = self._sub(self._one, self._noise_decay_rate)
multiplier = self._assign(self._noise_multiplier, self._mul(temp, self._noise_multiplier))
return F.depend(noise, multiplier)
class _MechanismsParamsUpdater(Cell):
"""
Update mechanisms parameters, the parameters will refresh in train period.
Args:
policy(str): Pass in by the mechanisms class, mechanisms parameters update policy.
decay_rate(Tensor): Pass in by the mechanisms class, hyper parameter for controlling the decay size.
cur_params(Parameter): Pass in by the mechanisms class, current params value in this time.
init_params(Parameter):Pass in by the mechanisms class, initial params value to be updated.
Returns:
Tuple, next params value.
"""
def __init__(self, policy, decay_rate, cur_params, init_params):
super(_MechanismsParamsUpdater, self).__init__()
self._policy = policy
self._decay_rate = decay_rate
self._cur_params = cur_params
self._init_params = init_params
self._div = P.Sub()
self._add = P.TensorAdd()
self._assign = P.Assign()
self._sub = P.Sub()
self._one = Tensor(1, mstype.float32)
self._mul = P.Mul()
def construct(self):
"""
update parameters to `self._cur_params`.
Returns:
Tuple, next step parameters value.
"""
if self._policy == 'Time':
temp = self._div(self._init_params, self._cur_params)
temp = self._add(temp, self._decay_rate)
next_params = self._assign(self._cur_params, self._div(self._init_params, temp))
else:
temp = self._sub(self._one, self._decay_rate)
next_params = self._assign(self._cur_params, self._mul(temp, self._cur_params))
return next_params
......@@ -21,9 +21,14 @@ from mindspore.ops import operations as P
from mindspore.ops import functional as F
from mindspore.common import dtype as mstype
from mindarmour.diff_privacy.mechanisms.mechanisms import MechanismsFactory
from mindarmour.utils.logger import LogUtil
from mindarmour.diff_privacy import MechanismsFactory
from mindarmour.diff_privacy.mechanisms.mechanisms import _MechanismsParamsUpdater
from mindarmour.utils._check_param import check_int_positive
LOGGER = LogUtil.get_instance()
TAG = 'DP optimizer'
_grad_scale = C.MultitypeFuncGraph("grad_scale")
_reciprocal = P.Reciprocal()
......@@ -97,7 +102,9 @@ class DPOptimizerClassFactory:
if policy == 'Adam':
cls = self._get_dp_optimizer_class(nn.Adam, self.mech, self._micro_batches, *args, **kwargs)
return cls
raise NameError("The {} is not implement, please choose ['SGD', 'Momentum', 'Adam']".format(policy))
msg = "The {} is not implement, please choose ['SGD', 'Momentum', 'Adam']".format(policy)
LOGGER.error(TAG, msg)
raise NameError(msg)
def _get_dp_optimizer_class(self, cls, mech, micro_batches):
"""
......@@ -119,6 +126,14 @@ class DPOptimizerClassFactory:
self._hyper_map = C.HyperMap()
self._micro_float = Tensor(micro_batches, mstype.float32)
self._mech_param_updater = None
if self._mech is not None and self._mech._decay_policy is not None:
self._mech_param_updater = _MechanismsParamsUpdater(policy=self._mech._decay_policy,
decay_rate=self._mech._noise_decay_rate,
cur_params=self._mech._noise_multiplier,
init_params=
self._mech._initial_noise_multiplier)
def construct(self, gradients):
"""
construct a compute flow.
......@@ -126,6 +141,10 @@ class DPOptimizerClassFactory:
grad_noise = self._hyper_map(self._mech, gradients)
grads = self._tuple_add(gradients, grad_noise)
grads = self._hyper_map(F.partial(_grad_scale, self._micro_float), grads)
# update mech parameters
if self._mech_param_updater is not None:
multiplier = self._mech_param_updater()
grads = F.depend(grads, multiplier)
gradients = super(DPOptimizer, self).construct(grads)
return gradients
......
......@@ -47,10 +47,15 @@ from mindspore.nn.wrap.loss_scale import _grad_overflow
from mindspore.nn import Cell
from mindspore import ParameterTuple
from mindarmour.utils.logger import LogUtil
from mindarmour.diff_privacy.mechanisms.mechanisms import _MechanismsParamsUpdater
from mindarmour.utils._check_param import check_param_type
from mindarmour.utils._check_param import check_value_positive
from mindarmour.utils._check_param import check_int_positive
LOGGER = LogUtil.get_instance()
TAG = 'DP model'
GRADIENT_CLIP_TYPE = 1
_grad_scale = C.MultitypeFuncGraph("grad_scale")
_reciprocal = P.Reciprocal()
......@@ -105,13 +110,19 @@ class DPModel(Model):
norm_clip = check_param_type('norm_clip', norm_clip, float)
self._norm_clip = check_value_positive('norm_clip', norm_clip)
if mech is not None and "DPOptimizer" in kwargs['optimizer'].__class__.__name__:
raise ValueError('DPOptimizer is not supported while mech is not None')
msg = 'DPOptimizer is not supported while mech is not None'
LOGGER.error(TAG, msg)
raise ValueError(msg)
if mech is None:
if "DPOptimizer" in kwargs['optimizer'].__class__.__name__:
if context.get_context('mode') != context.PYNATIVE_MODE:
raise ValueError('DPOptimizer just support pynative mode currently.')
msg = 'DPOptimizer just support pynative mode currently.'
LOGGER.error(TAG, msg)
raise ValueError(msg)
else:
raise ValueError('DPModel should set mech or DPOptimizer configure, please refer to example.')
msg = 'DPModel should set mech or DPOptimizer configure, please refer to example.'
LOGGER.error(TAG, msg)
raise ValueError(msg)
self._mech = mech
super(DPModel, self).__init__(**kwargs)
......@@ -163,10 +174,11 @@ class DPModel(Model):
if update_cell is not None:
# only cpu not support `TrainOneStepWithLossScaleCell` for control flow.
if not context.get_context("enable_ge") and context.get_context("device_target") == "CPU":
raise ValueError("Only `loss_scale_manager=None` and "
"`loss_scale_manager=FixedLossScaleManager(drop_overflow_update=False)`"
"are supported in current version. If you use `O2` option, please"
"use `loss_scale_manager=None` or `FixedLossScaleManager`")
msg = "Only `loss_scale_manager=None` and `loss_scale_manager=FixedLossScaleManager(drop_overflow" \
"_update=False)` are supported in current version. If you use `O2` option, please use " \
"`loss_scale_manager=None` or `FixedLossScaleManager`"
LOGGER.error(TAG, msg)
raise ValueError(msg)
network = _TrainOneStepWithLossScaleCell(network,
optimizer,
scale_update_cell=update_cell,
......@@ -174,6 +186,7 @@ class DPModel(Model):
norm_clip=self._norm_clip,
mech=self._mech).set_train()
return network
network = _TrainOneStepCell(network,
optimizer,
loss_scale,
......@@ -182,47 +195,48 @@ class DPModel(Model):
mech=self._mech).set_train()
return network
def _build_train_network(self):
"""Build train network"""
network = self._network
if self._micro_batches:
if self._optimizer:
if self._loss_scale_manager_set:
network = self._amp_build_train_network(network,
self._optimizer,
self._loss_fn,
level=self._amp_level,
loss_scale_manager=self._loss_scale_manager,
keep_batchnorm_fp32=self._keep_bn_fp32)
else:
network = self._amp_build_train_network(network,
self._optimizer,
self._loss_fn,
level=self._amp_level,
keep_batchnorm_fp32=self._keep_bn_fp32)
elif self._loss_fn:
network = nn.WithLossCell(network, self._loss_fn)
else:
if self._optimizer:
if self._loss_scale_manager_set:
network = amp.build_train_network(network,
self._optimizer,
self._loss_fn,
level=self._amp_level,
loss_scale_manager=self._loss_scale_manager,
keep_batchnorm_fp32=self._keep_bn_fp32)
else:
network = amp.build_train_network(network,
self._optimizer,
self._loss_fn,
level=self._amp_level,
keep_batchnorm_fp32=self._keep_bn_fp32)
elif self._loss_fn:
network = nn.WithLossCell(network, self._loss_fn)
if self._parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL):
network.set_auto_parallel()
return network
def _build_train_network(self):
"""Build train network"""
network = self._network
if self._micro_batches:
if self._optimizer:
if self._loss_scale_manager_set:
network = self._amp_build_train_network(network,
self._optimizer,
self._loss_fn,
level=self._amp_level,
loss_scale_manager=self._loss_scale_manager,
keep_batchnorm_fp32=self._keep_bn_fp32)
else:
network = self._amp_build_train_network(network,
self._optimizer,
self._loss_fn,
level=self._amp_level,
keep_batchnorm_fp32=self._keep_bn_fp32)
elif self._loss_fn:
network = nn.WithLossCell(network, self._loss_fn)
else:
if self._optimizer:
if self._loss_scale_manager_set:
network = amp.build_train_network(network,
self._optimizer,
self._loss_fn,
level=self._amp_level,
loss_scale_manager=self._loss_scale_manager,
keep_batchnorm_fp32=self._keep_bn_fp32)
else:
network = amp.build_train_network(network,
self._optimizer,
self._loss_fn,
level=self._amp_level,
keep_batchnorm_fp32=self._keep_bn_fp32)
elif self._loss_fn:
network = nn.WithLossCell(network, self._loss_fn)
if self._parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL, ParallelMode.AUTO_PARALLEL):
network.set_auto_parallel()
return network
class _ClipGradients(nn.Cell):
......@@ -358,6 +372,13 @@ class _TrainOneStepWithLossScaleCell(Cell):
self._hyper_map = C.HyperMap()
self._micro_float = Tensor(micro_batches, mstype.float32)
self._mech_param_updater = None
if self._mech is not None and self._mech._decay_policy is not None:
self._mech_param_updater = _MechanismsParamsUpdater(policy=self._mech._decay_policy,
decay_rate=self._mech._noise_decay_rate,
cur_params=self._mech._noise_multiplier,
init_params=self._mech._initial_noise_multiplier)
def construct(self, data, label, sens=None):
"""
construct a compute flow.
......@@ -380,14 +401,14 @@ class _TrainOneStepWithLossScaleCell(Cell):
record_labels = self._split(label)
# first index
loss = self.network(record_datas[0], record_labels[0])
scaling_sens_filled = C.ones_like(loss)*F.cast(scaling_sens, F.dtype(loss))
scaling_sens_filled = C.ones_like(loss) * F.cast(scaling_sens, F.dtype(loss))
record_grad = self.grad(self.network, weights)(record_datas[0], record_labels[0], scaling_sens_filled)
record_grad = self._clip_by_global_norm(record_grad, GRADIENT_CLIP_TYPE, self._l2_norm)
grads = record_grad
total_loss = loss
for i in range(1, self._micro_batches):
loss = self.network(record_datas[i], record_labels[i])
scaling_sens_filled = C.ones_like(loss)*F.cast(scaling_sens, F.dtype(loss))
scaling_sens_filled = C.ones_like(loss) * F.cast(scaling_sens, F.dtype(loss))
record_grad = self.grad(self.network, weights)(record_datas[i], record_labels[i], scaling_sens_filled)
record_grad = self._clip_by_global_norm(record_grad, GRADIENT_CLIP_TYPE, self._l2_norm)
grads = self._tuple_add(grads, record_grad)
......@@ -398,6 +419,10 @@ class _TrainOneStepWithLossScaleCell(Cell):
grad_noise = self._hyper_map(self._mech, grads)
grads = self._tuple_add(grads, grad_noise)
grads = self._hyper_map(F.partial(_grad_scale, self._micro_float), grads)
# update mech parameters
if self._mech_param_updater is not None:
multiplier = self._mech_param_updater()
loss = F.depend(loss, multiplier)
grads = self.hyper_map(F.partial(_grad_scale, scaling_sens), grads)
# apply grad reducer on grads
......@@ -474,6 +499,10 @@ class _TrainOneStepCell(Cell):
self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)
# dp params
if micro_batches is None:
msg = 'micro_batches must give in differential privacy, but got value: {}'.format(micro_batches)
LOGGER.error(TAG, msg)
raise ValueError(msg)
self._micro_batches = micro_batches
norm_clip = check_param_type('norm_clip', norm_clip, float)
self._l2_norm = check_value_positive('norm_clip', norm_clip)
......@@ -484,6 +513,13 @@ class _TrainOneStepCell(Cell):
self._hyper_map = C.HyperMap()
self._micro_float = Tensor(micro_batches, mstype.float32)
self._mech_param_updater = None
if self._mech is not None and self._mech._decay_policy is not None:
self._mech_param_updater = _MechanismsParamsUpdater(policy=self._mech._decay_policy,
decay_rate=self._mech._noise_decay_rate,
cur_params=self._mech._noise_multiplier,
init_params=self._mech._initial_noise_multiplier)
def construct(self, data, label):
"""
construct a compute flow.
......@@ -510,6 +546,10 @@ class _TrainOneStepCell(Cell):
grad_noise = self._hyper_map(self._mech, grads)
grads = self._tuple_add(grads, grad_noise)
grads = self._hyper_map(F.partial(_grad_scale, self._micro_float), grads)
# update mech parameters
if self._mech_param_updater is not None:
multiplier = self._mech_param_updater()
loss = F.depend(loss, multiplier)
if self.reducer_flag:
# apply grad reducer on grads
......
......@@ -41,7 +41,7 @@ def dataset_generator(batch_size, batches):
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_card
@pytest.mark.component_mindarmour
def test_dp_model_pynative_mode():
def test_dp_model_with_pynative_mode():
context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
norm_clip = 1.0
initial_noise_multiplier = 0.01
......@@ -96,3 +96,33 @@ def test_dp_model_with_graph_mode():
ms_ds = ds.GeneratorDataset(dataset_generator(batch_size, batches), ['data', 'label'])
ms_ds.set_dataset_size(batch_size * batches)
model.train(epochs, ms_ds, dataset_sink_mode=False)
@pytest.mark.level0
@pytest.mark.platform_arm_ascend_training
@pytest.mark.platform_x86_ascend_training
@pytest.mark.env_card
@pytest.mark.component_mindarmour
def test_dp_model_with_graph_mode_ada_gaussian():
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
norm_clip = 1.0
initial_noise_multiplier = 0.01
network = LeNet5()
batch_size = 32
batches = 128
epochs = 1
loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
mech = MechanismsFactory().create('AdaGaussian',
norm_bound=norm_clip,
initial_noise_multiplier=initial_noise_multiplier)
net_opt = nn.Momentum(network.trainable_params(), learning_rate=0.1, momentum=0.9)
model = DPModel(micro_batches=2,
norm_clip=norm_clip,
mech=mech,
network=network,
loss_fn=loss,
optimizer=net_opt,
metrics=None)
ms_ds = ds.GeneratorDataset(dataset_generator(batch_size, batches), ['data', 'label'])
ms_ds.set_dataset_size(batch_size * batches)
model.train(epochs, ms_ds, dataset_sink_mode=False)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册