privacy_engine_xl.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. import torch
  2. import opacus
  3. from opacus.per_sample_gradient_clip import PerSampleGradientClipper
  4. from opacus.utils import clipping
  5. from typing import List, Union
  6. import os
  7. def generate_noise(max_norm, parameter, noise_multiplier, noise_type, device):
  8. """
  9. A noise generation function that can utilize different distributions for noise generation.
  10. @param max_norm
  11. The maximum norm of the per-sample gradients. Any gradient with norm
  12. higher than this will be clipped to this value.
  13. @param parameter
  14. The parameter, based on which the dimension of the noise tensor
  15. will be determined
  16. @param noise_multiplier
  17. The ratio of the standard deviation of the Gaussian noise to
  18. the L2-sensitivity of the function to which the noise is added
  19. @param noise_type
  20. Sets the distribution for the noise generation.
  21. See generate_noise for supported strings.
  22. @param device
  23. The device used for calculations and needed for tensor definition.
  24. @return
  25. a tensor of noise in the same shape as ``parameter``.
  26. """
  27. if noise_multiplier > 0:
  28. mean = 0
  29. scale_scalar = noise_multiplier * max_norm
  30. scale = torch.full(size=parameter.shape, fill_value=scale_scalar, dtype=torch.float32, device=device)
  31. if noise_type.lower() in ["normal", "gauss", "gaussian"]:
  32. dist = torch.distributions.normal.Normal(mean, scale)
  33. elif noise_type.lower() in ["laplace", "laplacian"]:
  34. dist = torch.distributions.laplace.Laplace(mean, scale)
  35. elif noise_type.lower() in ["exponential"]:
  36. rate = 1 / scale
  37. dist = torch.distributions.exponential.Exponential(rate)
  38. else:
  39. dist = torch.distributions.normal.Normal(mean, scale)
  40. noise = dist.sample()
  41. return noise
  42. return 0.0
  43. # Server side Noise
  44. def apply_noise(weights, batch_size, max_norm, noise_multiplier, noise_type, device, loss_reduction="mean"):
  45. """
  46. A function for applying noise to weights on the (intermediate) server side that utilizes the generate_noise function above.
  47. @param weights
  48. The weights to which to apply the noise.
  49. @param batch_size
  50. Batch size used for averaging.
  51. @param max_norm
  52. The maximum norm of the per-sample gradients. Any gradient with norm
  53. higher than this will be clipped to this value.
  54. @param noise_multiplier
  55. The ratio of the standard deviation of the Gaussian noise to
  56. the L2-sensitivity of the function to which the noise is added
  57. @param noise_type
  58. Sets the distribution for the noise generation.
  59. See generate_noise for supported strings.
  60. @param device
  61. The device used for calculations and needed for tensor definition.
  62. @param loss_reduction
  63. The method of loss reduction.
  64. currently supported: mean
  65. """
  66. if isinstance(weights, dict):
  67. weights = weights.values()
  68. for p in weights:
  69. noise = generate_noise(max_norm, p, noise_multiplier, noise_type, device)
  70. if loss_reduction == "mean":
  71. noise /= batch_size
  72. p += noise
  73. # Server side Clipping
  74. def setup_clipper(model, max_grad_norm, batch_first=False, loss_reduction="mean"):
  75. norm_clipper = (
  76. # pyre-fixme[6]: Expected `float` for 1st param but got
  77. # `Union[List[float], float]`.
  78. clipping.ConstantFlatClipper(max_grad_norm)
  79. if not isinstance(max_grad_norm, list)
  80. # pyre-fixme[6]: Expected `List[float]` for 1st param but got
  81. # `Union[List[float], float]`.
  82. else clipping.ConstantPerLayerClipper(max_grad_norm)
  83. )
  84. # experimental clipping from opacus
  85. # if self.misc_settings.get("experimental", False):
  86. # norm_clipper = clipping._Dynamic_Clipper_(
  87. # # pyre-fixme[6]: Expected `List[float]` for 1st param but got
  88. # # `List[Union[List[float], float]]`.
  89. # [max_grad_norm],
  90. # self.misc_settings.get("clip_per_layer", False),
  91. # self.misc_settings.get(
  92. # "clipping_method", clipping.ClippingMethod.STATIC
  93. # ),
  94. # self.misc_settings.get("clipping_ratio", 0.0),
  95. # self.misc_settings.get("clipping_momentum", 0.0),
  96. # )
  97. clipper = PerSampleGradientClipper(
  98. model,
  99. norm_clipper,
  100. batch_first,
  101. loss_reduction,
  102. )
  103. return clipper
  104. # Server side Noise
  105. def apply_noise_clipping(model, batch_size, max_norm, noise_multiplier, noise_type, device, loss_reduction="mean"):
  106. """
  107. A function for applying noise and clipping to gradients of a model that utilizes the generate_noise function above.
  108. @param model
  109. The model on which's gradients to apply the noise and clipping.
  110. @param batch_size
  111. Batch size used for averaging.
  112. @param max_norm
  113. The maximum norm of the per-sample gradients. Any gradient with norm
  114. higher than this will be clipped to this value.
  115. @param noise_multiplier
  116. The ratio of the standard deviation of the Gaussian noise to
  117. the L2-sensitivity of the function to which the noise is added
  118. @param noise_type
  119. Sets the distribution for the noise generation.
  120. See generate_noise for supported strings.
  121. @param device
  122. The device used for calculations and needed for tensor definition.
  123. @param loss_reduction
  124. The method of loss reduction.
  125. currently supported: mean
  126. """
  127. clipper = setup_clipper(model, max_norm, False, loss_reduction)
  128. clipper.zero_grad()
  129. clipper.clip_and_accumulate()
  130. clip_values, batch_size = clipper.pre_step()
  131. params = (p for p in model.parameters() if p.requires_grad)
  132. for p, clip_value in zip(params, clip_values):
  133. noise = generate_noise(clip_value, p, noise_multiplier, noise_type, device)
  134. if loss_reduction == "mean":
  135. noise /= batch_size
  136. p.grad += noise
  137. # Client side Noise
  138. class PrivacyEngineXL(opacus.PrivacyEngine):
  139. """
  140. A privacy engine that can utilize different distributions for noise generation, based on opacus' privacy engine.
  141. It gets attached to the optimizer just like the privacy engine from opacus.
  142. @param module:
  143. The Pytorch module to which we are attaching the privacy engine
  144. @param batch_size
  145. Training batch size. Used in the privacy accountant.
  146. @param sample_size
  147. The size of the sample (dataset). Used in the privacy accountant.
  148. @param alphas
  149. A list of RDP orders
  150. @param noise_multiplier
  151. The ratio of the standard deviation of the Gaussian noise to
  152. the L2-sensitivity of the function to which the noise is added
  153. @param max_grad_norm
  154. The maximum norm of the per-sample gradients. Any gradient with norm
  155. higher than this will be clipped to this value.
  156. @param secure_rng
  157. If on, it will use ``torchcsprng`` for secure random number generation. Comes with
  158. a significant performance cost, therefore it's recommended that you turn it off when
  159. just experimenting.
  160. @param grad_norm_type
  161. The order of the norm. For instance, 2 represents L-2 norm, while
  162. 1 represents L-1 norm.
  163. @param batch_first
  164. Flag to indicate if the input tensor to the corresponding module
  165. has the first dimension representing the batch. If set to True,
  166. dimensions on input tensor will be ``[batch_size, ..., ...]``.
  167. @param target_delta
  168. The target delta
  169. @param loss_reduction
  170. Indicates if the loss reduction (for aggregating the gradients)
  171. is a sum or a mean operation. Can take values "sum" or "mean"
  172. @param noise_type
  173. Sets the distribution for the noise generation.
  174. See generate_noise for supported strings.
  175. @param **misc_settings
  176. Other arguments to the init
  177. """
  178. def __init__(
  179. self,
  180. module: torch.nn.Module,
  181. batch_size: int,
  182. sample_size: int,
  183. alphas: List[float],
  184. noise_multiplier: float,
  185. max_grad_norm: Union[float, List[float]],
  186. secure_rng: bool = False,
  187. grad_norm_type: int = 2,
  188. batch_first: bool = True,
  189. target_delta: float = 1e-6,
  190. loss_reduction: str = "mean",
  191. noise_type: str="gaussian",
  192. **misc_settings
  193. ):
  194. import warnings
  195. if secure_rng:
  196. warnings.warn(
  197. "Secure RNG was turned on. However it is not yet implemented for the noise distributions of privacy_engine_xl."
  198. )
  199. opacus.PrivacyEngine.__init__(
  200. self,
  201. module,
  202. batch_size,
  203. sample_size,
  204. alphas,
  205. noise_multiplier,
  206. max_grad_norm,
  207. secure_rng,
  208. grad_norm_type,
  209. batch_first,
  210. target_delta,
  211. loss_reduction,
  212. **misc_settings)
  213. self.noise_type = noise_type
  214. def _generate_noise(self, max_norm, parameter):
  215. """
  216. Generates a tensor of noise in the same shape as ``parameter``.
  217. @param max_norm
  218. The maximum norm of the per-sample gradients. Any gradient with norm
  219. higher than this will be clipped to this value.
  220. @param parameter
  221. The parameter, based on which the dimension of the noise tensor
  222. will be determined
  223. @return
  224. a tensor of noise in the same shape as ``parameter``.
  225. """
  226. return generate_noise(max_norm, parameter, self.noise_multiplier, self.noise_type, self.device)