privacy_engine_xl.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. import torch
  2. import opacus
  3. from typing import List, Union
  4. import os
  5. def generate_noise(max_norm, parameter, noise_multiplier, noise_type, device):
  6. """
  7. A noise generation function that can utilize different distributions for noise generation.
  8. @param max_norm
  9. The maximum norm of the per-sample gradients. Any gradient with norm
  10. higher than this will be clipped to this value.
  11. @param parameter
  12. The parameter, based on which the dimension of the noise tensor
  13. will be determined
  14. @param noise_multiplier
  15. The ratio of the standard deviation of the Gaussian noise to
  16. the L2-sensitivity of the function to which the noise is added
  17. @param noise_type
  18. Sets the distribution for the noise generation.
  19. See generate_noise for supported strings.
  20. @param device
  21. The device used for calculations and needed for tensor definition.
  22. @return
  23. a tensor of noise in the same shape as ``parameter``.
  24. """
  25. if noise_multiplier > 0:
  26. mean = 0
  27. scale_scalar = noise_multiplier * max_norm
  28. scale = torch.full(size=parameter.shape, fill_value=scale_scalar, dtype=torch.float32, device=device)
  29. if noise_type.lower() in ["normal", "gauss", "gaussian"]:
  30. dist = torch.distributions.normal.Normal(mean, scale)
  31. elif noise_type.lower() in ["laplace", "laplacian"]:
  32. dist = torch.distributions.laplace.Laplace(mean, scale)
  33. elif noise_type.lower() in ["exponential"]:
  34. rate = 1 / scale
  35. dist = torch.distributions.exponential.Exponential(rate)
  36. else:
  37. dist = torch.distributions.normal.Normal(mean, scale)
  38. noise = dist.sample()
  39. return noise
  40. return 0.0
  41. # Server side Noise
  42. def apply_noise(weights, batch_size, noise_multiplier, noise_type, device, loss_reduction="mean"):
  43. """
  44. A function for applying noise to weights on the (intermediate) server side that utilizes the generate_noise function above.
  45. @param weights
  46. The weights to which to apply the noise.
  47. @param batch_size
  48. Batch size used for averaging.
  49. @param noise_multiplier
  50. The ratio of the standard deviation of the Gaussian noise to
  51. the L2-sensitivity of the function to which the noise is added
  52. @param noise_type
  53. Sets the distribution for the noise generation.
  54. See generate_noise for supported strings.
  55. @param device
  56. The device used for calculations and needed for tensor definition.
  57. @param loss_reduction
  58. The method of loss reduction.
  59. currently supported: mean
  60. """
  61. for p in weights.values():
  62. noise = generate_noise(0, p, noise_multiplier, noise_type, device)
  63. if loss_reduction == "mean":
  64. noise /= batch_size
  65. p += noise
  66. # Client side Noise
  67. class PrivacyEngineXL(opacus.PrivacyEngine):
  68. """
  69. A privacy engine that can utilize different distributions for noise generation, based on opacus' privacy engine.
  70. It gets attached to the optimizer just like the privacy engine from opacus.
  71. @param module:
  72. The Pytorch module to which we are attaching the privacy engine
  73. @param batch_size
  74. Training batch size. Used in the privacy accountant.
  75. @param sample_size
  76. The size of the sample (dataset). Used in the privacy accountant.
  77. @param alphas
  78. A list of RDP orders
  79. @param noise_multiplier
  80. The ratio of the standard deviation of the Gaussian noise to
  81. the L2-sensitivity of the function to which the noise is added
  82. @param max_grad_norm
  83. The maximum norm of the per-sample gradients. Any gradient with norm
  84. higher than this will be clipped to this value.
  85. @param secure_rng
  86. If on, it will use ``torchcsprng`` for secure random number generation. Comes with
  87. a significant performance cost, therefore it's recommended that you turn it off when
  88. just experimenting.
  89. @param grad_norm_type
  90. The order of the norm. For instance, 2 represents L-2 norm, while
  91. 1 represents L-1 norm.
  92. @param batch_first
  93. Flag to indicate if the input tensor to the corresponding module
  94. has the first dimension representing the batch. If set to True,
  95. dimensions on input tensor will be ``[batch_size, ..., ...]``.
  96. @param target_delta
  97. The target delta
  98. @param loss_reduction
  99. Indicates if the loss reduction (for aggregating the gradients)
  100. is a sum or a mean operation. Can take values "sum" or "mean"
  101. @param noise_type
  102. Sets the distribution for the noise generation.
  103. See generate_noise for supported strings.
  104. @param **misc_settings
  105. Other arguments to the init
  106. """
  107. def __init__(
  108. self,
  109. module: torch.nn.Module,
  110. batch_size: int,
  111. sample_size: int,
  112. alphas: List[float],
  113. noise_multiplier: float,
  114. max_grad_norm: Union[float, List[float]],
  115. secure_rng: bool = False,
  116. grad_norm_type: int = 2,
  117. batch_first: bool = True,
  118. target_delta: float = 1e-6,
  119. loss_reduction: str = "mean",
  120. noise_type: str="gaussian",
  121. **misc_settings
  122. ):
  123. import warnings
  124. if secure_rng:
  125. warnings.warn(
  126. "Secure RNG was turned on. However it is not yet implemented for the noise distributions of privacy_engine_xl."
  127. )
  128. opacus.PrivacyEngine.__init__(
  129. self,
  130. module,
  131. batch_size,
  132. sample_size,
  133. alphas,
  134. noise_multiplier,
  135. max_grad_norm,
  136. secure_rng,
  137. grad_norm_type,
  138. batch_first,
  139. target_delta,
  140. loss_reduction,
  141. **misc_settings)
  142. self.noise_type = noise_type
  143. def _generate_noise(self, max_norm, parameter):
  144. """
  145. Generates a tensor of noise in the same shape as ``parameter``.
  146. @param max_norm
  147. The maximum norm of the per-sample gradients. Any gradient with norm
  148. higher than this will be clipped to this value.
  149. @param parameter
  150. The parameter, based on which the dimension of the noise tensor
  151. will be determined
  152. @return
  153. a tensor of noise in the same shape as ``parameter``.
  154. """
  155. return generate_noise(max_norm, parameter, self.noise_multiplier, self.noise_type, self.device)