privacy_engine_xl.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. import torch
  2. import opacus
  3. from typing import List, Union
  4. import os
  5. def generate_noise(max_norm, parameter, noise_multiplier, noise_type, device):
  6. """
  7. A noise generation function that can utilize different distributions for noise generation.
  8. @param max_norm
  9. The maximum norm of the per-sample gradients. Any gradient with norm
  10. higher than this will be clipped to this value.
  11. @param parameter
  12. The parameter, based on which the dimension of the noise tensor
  13. will be determined
  14. @param noise_multiplier
  15. The ratio of the standard deviation of the Gaussian noise to
  16. the L2-sensitivity of the function to which the noise is added
  17. @param noise_type
  18. Sets the distribution for the noise generation.
  19. See generate_noise for supported strings.
  20. @param device
  21. The device used for calculations and needed for tensor definition.
  22. @return
  23. a tensor of noise in the same shape as ``parameter``.
  24. """
  25. if noise_multiplier > 0:
  26. mean = 0
  27. scale_scalar = noise_multiplier * max_norm
  28. scale = torch.full(size=parameter.shape, fill_value=scale_scalar, dtype=torch.float32, device=device)
  29. if noise_type.lower() in ["normal", "gauss", "gaussian"]:
  30. dist = torch.distributions.normal.Normal(mean, scale)
  31. elif noise_type.lower() in ["laplace", "laplacian"]:
  32. dist = torch.distributions.laplace.Laplace(mean, scale)
  33. elif noise_type.lower() in ["exponential"]:
  34. rate = 1 / scale
  35. dist = torch.distributions.exponential.Exponential(rate)
  36. else:
  37. dist = torch.distributions.normal.Normal(mean, scale)
  38. noise = dist.sample()
  39. return noise
  40. return 0.0
  41. # Server side Noise
  42. def apply_noise(weights, batch_size, max_norm, noise_multiplier, noise_type, device, loss_reduction="mean", clipping=False):
  43. """
  44. A function for applying noise to weights on the (intermediate) server side that utilizes the generate_noise function above.
  45. @param weights
  46. The weights to which to apply the noise.
  47. @param batch_size
  48. Batch size used for averaging.
  49. @param max_norm
  50. The maximum norm of the per-sample gradients. Any gradient with norm
  51. higher than this will be clipped to this value.
  52. @param noise_multiplier
  53. The ratio of the standard deviation of the Gaussian noise to
  54. the L2-sensitivity of the function to which the noise is added
  55. @param noise_type
  56. Sets the distribution for the noise generation.
  57. See generate_noise for supported strings.
  58. @param device
  59. The device used for calculations and needed for tensor definition.
  60. @param loss_reduction
  61. The method of loss reduction.
  62. currently supported: mean
  63. """
  64. if isinstance(weights, dict):
  65. weights = weights.values()
  66. if max_norm == None:
  67. max_norm = 1.0
  68. for p in weights:
  69. if clipping:
  70. norm = torch.norm(p, p=2)
  71. div_norm = max(1, norm/max_norm)
  72. p /= div_norm
  73. noise = generate_noise(max_norm, p, noise_multiplier, noise_type, device)
  74. if loss_reduction == "mean":
  75. noise /= batch_size
  76. p += noise
  77. # Client side Noise
  78. class PrivacyEngineXL(opacus.PrivacyEngine):
  79. """
  80. A privacy engine that can utilize different distributions for noise generation, based on opacus' privacy engine.
  81. It gets attached to the optimizer just like the privacy engine from opacus.
  82. @param module:
  83. The Pytorch module to which we are attaching the privacy engine
  84. @param batch_size
  85. Training batch size. Used in the privacy accountant.
  86. @param sample_size
  87. The size of the sample (dataset). Used in the privacy accountant.
  88. @param alphas
  89. A list of RDP orders
  90. @param noise_multiplier
  91. The ratio of the standard deviation of the Gaussian noise to
  92. the L2-sensitivity of the function to which the noise is added
  93. @param max_grad_norm
  94. The maximum norm of the per-sample gradients. Any gradient with norm
  95. higher than this will be clipped to this value.
  96. @param secure_rng
  97. If on, it will use ``torchcsprng`` for secure random number generation. Comes with
  98. a significant performance cost, therefore it's recommended that you turn it off when
  99. just experimenting.
  100. @param grad_norm_type
  101. The order of the norm. For instance, 2 represents L-2 norm, while
  102. 1 represents L-1 norm.
  103. @param batch_first
  104. Flag to indicate if the input tensor to the corresponding module
  105. has the first dimension representing the batch. If set to True,
  106. dimensions on input tensor will be ``[batch_size, ..., ...]``.
  107. @param target_delta
  108. The target delta
  109. @param loss_reduction
  110. Indicates if the loss reduction (for aggregating the gradients)
  111. is a sum or a mean operation. Can take values "sum" or "mean"
  112. @param noise_type
  113. Sets the distribution for the noise generation.
  114. See generate_noise for supported strings.
  115. @param **misc_settings
  116. Other arguments to the init
  117. """
  118. def __init__(
  119. self,
  120. module: torch.nn.Module,
  121. batch_size: int,
  122. sample_size: int,
  123. alphas: List[float],
  124. noise_multiplier: float,
  125. max_grad_norm: Union[float, List[float]],
  126. secure_rng: bool = False,
  127. grad_norm_type: int = 2,
  128. batch_first: bool = True,
  129. target_delta: float = 1e-6,
  130. loss_reduction: str = "mean",
  131. noise_type: str="gaussian",
  132. **misc_settings
  133. ):
  134. import warnings
  135. if secure_rng:
  136. warnings.warn(
  137. "Secure RNG was turned on. However it is not yet implemented for the noise distributions of privacy_engine_xl."
  138. )
  139. opacus.PrivacyEngine.__init__(
  140. self,
  141. module,
  142. batch_size,
  143. sample_size,
  144. alphas,
  145. noise_multiplier,
  146. max_grad_norm,
  147. secure_rng,
  148. grad_norm_type,
  149. batch_first,
  150. target_delta,
  151. loss_reduction,
  152. **misc_settings)
  153. self.noise_type = noise_type
  154. def _generate_noise(self, max_norm, parameter):
  155. """
  156. Generates a tensor of noise in the same shape as ``parameter``.
  157. @param max_norm
  158. The maximum norm of the per-sample gradients. Any gradient with norm
  159. higher than this will be clipped to this value.
  160. @param parameter
  161. The parameter, based on which the dimension of the noise tensor
  162. will be determined
  163. @return
  164. a tensor of noise in the same shape as ``parameter``.
  165. """
  166. return generate_noise(max_norm, parameter, self.noise_multiplier, self.noise_type, self.device)