privacy_engine_xl.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. import torch
  2. import opacus
  3. from typing import List, Union
  4. import os
  5. def generate_noise(max_norm, parameter, noise_multiplier, noise_type, device):
  6. """
  7. A noise generation function that can utilize different distributions for noise generation.
  8. @param max_norm
  9. The maximum norm of the per-sample gradients. Any gradient with norm
  10. higher than this will be clipped to this value.
  11. @param parameter
  12. The parameter, based on which the dimension of the noise tensor
  13. will be determined
  14. @param noise_multiplier
  15. The ratio of the standard deviation of the Gaussian noise to
  16. the L2-sensitivity of the function to which the noise is added
  17. @param noise_type
  18. Sets the distribution for the noise generation.
  19. See generate_noise for supported strings.
  20. @param device
  21. The device used for calculations and needed for tensor definition.
  22. @return
  23. a tensor of noise in the same shape as ``parameter``.
  24. """
  25. if noise_multiplier > 0:
  26. mean = 0
  27. scale_scalar = noise_multiplier * max_norm
  28. scale = torch.full(size=parameter.shape, fill_value=scale_scalar, dtype=torch.float32, device=device)
  29. if noise_type.lower() in ["normal", "gauss", "gaussian"]:
  30. dist = torch.distributions.normal.Normal(mean, scale)
  31. elif noise_type.lower() in ["laplace", "laplacian"]:
  32. dist = torch.distributions.laplace.Laplace(mean, scale)
  33. elif noise_type.lower() in ["exponential"]:
  34. rate = 1 / scale
  35. dist = torch.distributions.exponential.Exponential(rate)
  36. else:
  37. dist = torch.distributions.normal.Normal(mean, scale)
  38. noise = dist.sample()
  39. return noise
  40. return 0.0
  41. # Server side Noise
  42. def apply_noise(weights, batch_size, max_norm, noise_multiplier, noise_type, device, loss_reduction="mean"):
  43. """
  44. A function for applying noise to weights on the (intermediate) server side that utilizes the generate_noise function above.
  45. @param weights
  46. The weights to which to apply the noise.
  47. @param batch_size
  48. Batch size used for averaging.
  49. @param max_norm
  50. The maximum norm of the per-sample gradients. Any gradient with norm
  51. higher than this will be clipped to this value.
  52. @param noise_multiplier
  53. The ratio of the standard deviation of the Gaussian noise to
  54. the L2-sensitivity of the function to which the noise is added
  55. @param noise_type
  56. Sets the distribution for the noise generation.
  57. See generate_noise for supported strings.
  58. @param device
  59. The device used for calculations and needed for tensor definition.
  60. @param loss_reduction
  61. The method of loss reduction.
  62. currently supported: mean
  63. """
  64. for p in weights.values():
  65. noise = generate_noise(max_norm, p, noise_multiplier, noise_type, device)
  66. if loss_reduction == "mean":
  67. noise /= batch_size
  68. p += noise
  69. # Client side Noise
  70. class PrivacyEngineXL(opacus.PrivacyEngine):
  71. """
  72. A privacy engine that can utilize different distributions for noise generation, based on opacus' privacy engine.
  73. It gets attached to the optimizer just like the privacy engine from opacus.
  74. @param module:
  75. The Pytorch module to which we are attaching the privacy engine
  76. @param batch_size
  77. Training batch size. Used in the privacy accountant.
  78. @param sample_size
  79. The size of the sample (dataset). Used in the privacy accountant.
  80. @param alphas
  81. A list of RDP orders
  82. @param noise_multiplier
  83. The ratio of the standard deviation of the Gaussian noise to
  84. the L2-sensitivity of the function to which the noise is added
  85. @param max_grad_norm
  86. The maximum norm of the per-sample gradients. Any gradient with norm
  87. higher than this will be clipped to this value.
  88. @param secure_rng
  89. If on, it will use ``torchcsprng`` for secure random number generation. Comes with
  90. a significant performance cost, therefore it's recommended that you turn it off when
  91. just experimenting.
  92. @param grad_norm_type
  93. The order of the norm. For instance, 2 represents L-2 norm, while
  94. 1 represents L-1 norm.
  95. @param batch_first
  96. Flag to indicate if the input tensor to the corresponding module
  97. has the first dimension representing the batch. If set to True,
  98. dimensions on input tensor will be ``[batch_size, ..., ...]``.
  99. @param target_delta
  100. The target delta
  101. @param loss_reduction
  102. Indicates if the loss reduction (for aggregating the gradients)
  103. is a sum or a mean operation. Can take values "sum" or "mean"
  104. @param noise_type
  105. Sets the distribution for the noise generation.
  106. See generate_noise for supported strings.
  107. @param **misc_settings
  108. Other arguments to the init
  109. """
  110. def __init__(
  111. self,
  112. module: torch.nn.Module,
  113. batch_size: int,
  114. sample_size: int,
  115. alphas: List[float],
  116. noise_multiplier: float,
  117. max_grad_norm: Union[float, List[float]],
  118. secure_rng: bool = False,
  119. grad_norm_type: int = 2,
  120. batch_first: bool = True,
  121. target_delta: float = 1e-6,
  122. loss_reduction: str = "mean",
  123. noise_type: str="gaussian",
  124. **misc_settings
  125. ):
  126. import warnings
  127. if secure_rng:
  128. warnings.warn(
  129. "Secure RNG was turned on. However it is not yet implemented for the noise distributions of privacy_engine_xl."
  130. )
  131. opacus.PrivacyEngine.__init__(
  132. self,
  133. module,
  134. batch_size,
  135. sample_size,
  136. alphas,
  137. noise_multiplier,
  138. max_grad_norm,
  139. secure_rng,
  140. grad_norm_type,
  141. batch_first,
  142. target_delta,
  143. loss_reduction,
  144. **misc_settings)
  145. self.noise_type = noise_type
  146. def _generate_noise(self, max_norm, parameter):
  147. """
  148. Generates a tensor of noise in the same shape as ``parameter``.
  149. @param max_norm
  150. The maximum norm of the per-sample gradients. Any gradient with norm
  151. higher than this will be clipped to this value.
  152. @param parameter
  153. The parameter, based on which the dimension of the noise tensor
  154. will be determined
  155. @return
  156. a tensor of noise in the same shape as ``parameter``.
  157. """
  158. return generate_noise(max_norm, parameter, self.noise_multiplier, self.noise_type, self.device)