123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187 |
- import torch
- import opacus
- from typing import List, Union
- import os
- def generate_noise(max_norm, parameter, noise_multiplier, noise_type, device):
- """
- A noise generation function that can utilize different distributions for noise generation.
- @param max_norm
- The maximum norm of the per-sample gradients. Any gradient with norm
- higher than this will be clipped to this value.
- @param parameter
- The parameter, based on which the dimension of the noise tensor
- will be determined
- @param noise_multiplier
- The ratio of the standard deviation of the Gaussian noise to
- the L2-sensitivity of the function to which the noise is added
- @param noise_type
- Sets the distribution for the noise generation.
- See generate_noise for supported strings.
- @param device
- The device used for calculations and needed for tensor definition.
- @return
- a tensor of noise in the same shape as ``parameter``.
- """
- if noise_multiplier > 0:
- mean = 0
- scale_scalar = noise_multiplier * max_norm
- scale = torch.full(size=parameter.shape, fill_value=scale_scalar, dtype=torch.float32, device=device)
- if noise_type.lower() in ["normal", "gauss", "gaussian"]:
- dist = torch.distributions.normal.Normal(mean, scale)
- elif noise_type.lower() in ["laplace", "laplacian"]:
- dist = torch.distributions.laplace.Laplace(mean, scale)
- elif noise_type.lower() in ["exponential"]:
- rate = 1 / scale
- dist = torch.distributions.exponential.Exponential(rate)
- else:
- dist = torch.distributions.normal.Normal(mean, scale)
- noise = dist.sample()
- return noise
- return 0.0
- def apply_noise(weights, batch_size, max_norm, noise_multiplier, noise_type, device, loss_reduction="mean", clipping=False):
- """
- A function for applying noise to weights on the (intermediate) server side that utilizes the generate_noise function above.
- @param weights
- The weights to which to apply the noise.
- @param batch_size
- Batch size used for averaging.
- @param max_norm
- The maximum norm of the per-sample gradients. Any gradient with norm
- higher than this will be clipped to this value.
- @param noise_multiplier
- The ratio of the standard deviation of the Gaussian noise to
- the L2-sensitivity of the function to which the noise is added
- @param noise_type
- Sets the distribution for the noise generation.
- See generate_noise for supported strings.
- @param device
- The device used for calculations and needed for tensor definition.
- @param loss_reduction
- The method of loss reduction.
- currently supported: mean
- """
- if isinstance(weights, dict):
- weights = weights.values()
- if max_norm == None:
- max_norm = 1.0
- for p in weights:
- if clipping:
- norm = torch.norm(p, p=2)
- div_norm = max(1, norm/max_norm)
- p /= div_norm
- noise = generate_noise(max_norm, p, noise_multiplier, noise_type, device)
- if loss_reduction == "mean":
- noise /= batch_size
- p += noise
- class PrivacyEngineXL(opacus.PrivacyEngine):
- """
- A privacy engine that can utilize different distributions for noise generation, based on opacus' privacy engine.
- It gets attached to the optimizer just like the privacy engine from opacus.
- @param module:
- The Pytorch module to which we are attaching the privacy engine
- @param batch_size
- Training batch size. Used in the privacy accountant.
- @param sample_size
- The size of the sample (dataset). Used in the privacy accountant.
- @param alphas
- A list of RDP orders
- @param noise_multiplier
- The ratio of the standard deviation of the Gaussian noise to
- the L2-sensitivity of the function to which the noise is added
- @param max_grad_norm
- The maximum norm of the per-sample gradients. Any gradient with norm
- higher than this will be clipped to this value.
- @param secure_rng
- If on, it will use ``torchcsprng`` for secure random number generation. Comes with
- a significant performance cost, therefore it's recommended that you turn it off when
- just experimenting.
- @param grad_norm_type
- The order of the norm. For instance, 2 represents L-2 norm, while
- 1 represents L-1 norm.
- @param batch_first
- Flag to indicate if the input tensor to the corresponding module
- has the first dimension representing the batch. If set to True,
- dimensions on input tensor will be ``[batch_size, ..., ...]``.
- @param target_delta
- The target delta
- @param loss_reduction
- Indicates if the loss reduction (for aggregating the gradients)
- is a sum or a mean operation. Can take values "sum" or "mean"
- @param noise_type
- Sets the distribution for the noise generation.
- See generate_noise for supported strings.
- @param **misc_settings
- Other arguments to the init
- """
- def __init__(
- self,
- module: torch.nn.Module,
- batch_size: int,
- sample_size: int,
- alphas: List[float],
- noise_multiplier: float,
- max_grad_norm: Union[float, List[float]],
- secure_rng: bool = False,
- grad_norm_type: int = 2,
- batch_first: bool = True,
- target_delta: float = 1e-6,
- loss_reduction: str = "mean",
- noise_type: str="gaussian",
- **misc_settings
- ):
- import warnings
- if secure_rng:
- warnings.warn(
- "Secure RNG was turned on. However it is not yet implemented for the noise distributions of privacy_engine_xl."
- )
- opacus.PrivacyEngine.__init__(
- self,
- module,
- batch_size,
- sample_size,
- alphas,
- noise_multiplier,
- max_grad_norm,
- secure_rng,
- grad_norm_type,
- batch_first,
- target_delta,
- loss_reduction,
- **misc_settings)
- self.noise_type = noise_type
- def _generate_noise(self, max_norm, parameter):
- """
- Generates a tensor of noise in the same shape as ``parameter``.
- @param max_norm
- The maximum norm of the per-sample gradients. Any gradient with norm
- higher than this will be clipped to this value.
- @param parameter
- The parameter, based on which the dimension of the noise tensor
- will be determined
- @return
- a tensor of noise in the same shape as ``parameter``.
- """
- return generate_noise(max_norm, parameter, self.noise_multiplier, self.noise_type, self.device)
|