group.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. import logging
  2. import math
  3. logger = logging.getLogger("pra_framework")
  4. class Group(object):
  5. """
  6. A Group represents a group of IP addresses regarding the PRA.
  7. It's generally connected to a marker value for identification purposes.
  8. A Group contains single addreses (group_type == Group.GROUP_TYPE_SINGLE_ADDRESSES)
  9. OR address ranges (group_type == Group.GROUP_TYPE_CIDR)
  10. """
  11. GROUP_TYPE_SINGLE_ADDRESSES = 0
  12. GROUP_TYPE_CIDR = 1
  13. def __init__(self,
  14. ip_network_object=None,
  15. ip_host_objects=None,
  16. response_count=0):
  17. """
  18. Given Parameters: The IPs this group is made of.
  19. ip_network_bytes -- bytes like b"1234" to create this group
  20. ip_host_objects -- IPs to be used to create this group
  21. """
  22. self.ip_network = None
  23. self.ip_hosts = None
  24. if ip_network_object is not None:
  25. self.ip_network = ip_network_object
  26. else:
  27. self.ip_hosts = ip_host_objects
  28. # "+1"-groups won't get scanned, the amount of responses is derived implicitly
  29. self.is_plus1 = False
  30. # store "+1"-Subgroup for faster access (marker bytes OR group itself)
  31. self.plus1_subgroup = None
  32. # logger.debug("group network/addresses: %r/%r" % (self.ip_network, self.ip_hosts))
  33. # full marker as bytes
  34. self.marker_bytes = None
  35. self.marker_value_int = None
  36. # how many times the group is counted in the report
  37. self.response_count = response_count
  38. # needed to create additional blacklists for other groups which do not need
  39. # to scan this addresses again (already placed in separate group)
  40. self.top_group = None
  41. self.subgroups = set()
  42. def _get_grouptype(self):
  43. return Group.GROUP_TYPE_CIDR if self.ip_network is not None else Group.GROUP_TYPE_SINGLE_ADDRESSES
  44. group_type = property(_get_grouptype)
  45. def _get_amount_addresses(self):
  46. return self.ip_network.num_addresses if self.group_type == Group.GROUP_TYPE_CIDR else len(self.ip_hosts)
  47. amount_addresses = property(_get_amount_addresses)
  48. def add_subgroup(self, group):
  49. group.top_group = self
  50. #group.top_group_markervalue_bytes = self.markervalue_bytes
  51. self.subgroups.add(group)
  52. def create_subgroups(self, amount_subgroups, ipv4_addresses=[], use_plus1=False):
  53. """
  54. Create amount subgroups based on amount_subgroups and response counts.
  55. amount_subgroups -- amount of subgroups to create, this includes the +1 group
  56. e.g. amount_subgroups = 4 = subgroups + "+1"
  57. ipv4_addresses -- list of single IP addresses as IP objects to be added as dedicated group.
  58. use_plus1 -- define last subgroup as +1 group
  59. return -- created subgroups as list
  60. """
  61. subgroups = []
  62. if self.response_count >= self.amount_addresses:
  63. if self.response_count > self.amount_addresses:
  64. logger.warning("got more responses than amount addresses, too much noise?"
  65. " responses/# addresses = %d/%d" % (self.response_count, self.amount_addresses))
  66. return subgroups
  67. # check if this is a single address group (CIDR prefix is 32 or just 1 single address)
  68. if self.amount_addresses <= 1:
  69. return subgroups
  70. if self.group_type == Group.GROUP_TYPE_SINGLE_ADDRESSES:
  71. single_addr_amount = len(self.ip_hosts)
  72. """
  73. logger.debug("subgrouping single addresses: addresses/target subgroups = %d/%d" %
  74. (single_addr_amount, amount_subgroups))
  75. """
  76. if single_addr_amount == 0:
  77. logger.warning("no host for subgrouping!")
  78. # split by amount of adresses
  79. groupsize_single = math.floor(single_addr_amount / amount_subgroups)
  80. # at minimum 1 group
  81. groupsize_single = max(1, groupsize_single)
  82. for i in range(0, single_addr_amount, groupsize_single):
  83. # not enough room for more groups, add all remaining addresses
  84. if len(subgroups) + 1 >= amount_subgroups:
  85. subgroup = Group(ip_host_objects=self.ip_hosts[i:])
  86. subgroups.append(subgroup)
  87. break
  88. else:
  89. sub_ips = self.ip_hosts[i: i + groupsize_single]
  90. # this should not happen
  91. if len(sub_ips) != 0:
  92. subgroup = Group(ip_host_objects=sub_ips)
  93. subgroups.append(subgroup)
  94. else:
  95. break
  96. elif self.group_type == Group.GROUP_TYPE_CIDR:
  97. subgroups_single_addresses = 0
  98. #logger.debug("picking IPv4 addresses which belong to this groups (searching through: %d)" % len(ip_host_objects))
  99. single_addresses_for_regrouping = ipv4_addresses
  100. # amount of single addresses to be re-groupted
  101. single_addr_amount = len(single_addresses_for_regrouping)
  102. if single_addr_amount > 0:
  103. if len(single_addresses_for_regrouping) > 0:
  104. #logger.debug("re-grouping %d single addresses for nw %r" % (single_addr_amount, self.ip_network))
  105. #for single_addr in single_addresses_for_regrouping:
  106. # logger.debug("%r" % single_addr)
  107. # calculate amount of subgroups for single addresses:
  108. # (amount of single addreses / total group address space) * amount of groups
  109. subgroups_single_addresses = math.floor((single_addr_amount / self.ip_network.num_addresses) *
  110. amount_subgroups)
  111. # at minimum 1 group
  112. subgroups_single_addresses = max(1, subgroups_single_addresses)
  113. # not more groups than single addresses
  114. subgroups_single_addresses = min(single_addr_amount, subgroups_single_addresses)
  115. groupsize_single = math.floor(single_addr_amount / subgroups_single_addresses)
  116. """
  117. logger.debug("creating single addresses groups,"
  118. "addr total=%d, groups total=%d, group size=: %d/%d/%d" %
  119. (single_addr_amount,
  120. subgroups_single_addresses,
  121. groupsize_single))
  122. """
  123. for i in range(0, single_addr_amount, groupsize_single):
  124. if len(subgroups) + 1 >= subgroups_single_addresses:
  125. group = Group(ip_host_objects=single_addresses_for_regrouping[i:])
  126. #logger.debug("adding single addresses group: %r" % group)
  127. subgroups.append(group)
  128. break
  129. else:
  130. addresses = single_addresses_for_regrouping[i: i + groupsize_single]
  131. if len(addresses) != 0:
  132. group = Group(ip_host_objects=addresses)
  133. #logger.debug("adding single addresses group: %r" % group)
  134. subgroups.append(group)
  135. else:
  136. # no more groups to split up
  137. break
  138. # round to next lower integer with 2**x <= (amount of groups for cidr)
  139. # Example: 16/5 (ip/groups) = 5 -> 2**_log_2(5)_ = 4 top groups [4,4,4,4]
  140. # (which are splittet later on) -> [4,4,4,2,2]
  141. cidr_bits_plus = math.floor(math.log(
  142. amount_subgroups - subgroups_single_addresses,
  143. 2))
  144. # not more CIDR bits than available
  145. cidr_bits_plus = min(32 - self.ip_network.prefixlen, cidr_bits_plus)
  146. """
  147. logger.debug("current prefix=%d, CIDR bits plus=%d, amount subgroups=%d, single addresses=%d" %
  148. (self.ip_network.prefixlen, cidr_bits_plus, amount_subgroups, subgroups_single_addresses))
  149. """
  150. # create subnets: e.g. 1.2.3.0/24 -> CIDR+1 -> 1.2.3.0/25, 1.2.3.128/25
  151. cidr_nw = self.ip_network.subnets(prefixlen_diff=cidr_bits_plus)
  152. cidr_nw_len_at_start = len(cidr_nw)
  153. # amount of times we reached CIDR /32
  154. splitfail_cnt = 0
  155. # logger.debug("re-splitting groups for CIDR, initial groupsize/target amount of groups/CIDR bits +x: %d/%d/%d" %
  156. # (cidr_nw_len_at_start, amount_groups_for_cidr, cidr_bits_plus))
  157. subgroup_len = len(subgroups)
  158. while len(cidr_nw) + subgroup_len < amount_subgroups:
  159. # split subgroups until we have enough of them
  160. # [A,B,C,D] -> split A by 1 bit -> [B,C,D,a,a] -> split B by 1 bit -> [C,D,a,a,b,b] ...
  161. group_to_split = cidr_nw[0]
  162. del cidr_nw[0]
  163. if group_to_split.prefixlen == 32:
  164. # nothing to split: re-append to the end
  165. cidr_nw.append(group_to_split)
  166. splitfail_cnt += 1
  167. #logger.debug("can't create subnets anymore: /32 reached for %r" % group_to_split)
  168. if splitfail_cnt > len(cidr_nw):
  169. # logger.warning("too many split fails: single addresses reached?")
  170. break
  171. else:
  172. subnets = group_to_split.subnets(prefixlen_diff=1)
  173. if subgroup_len + len(cidr_nw) + len(subnets) > amount_subgroups:
  174. logger.debug("!!! stopping CIDR subgrouping: split would increase max number")
  175. cidr_nw.append(group_to_split)
  176. break
  177. #logger.debug("new subgroups: %d" % len(subnets))
  178. # append subnet to the end and try next
  179. cidr_nw.extend(subnets)
  180. if cidr_nw_len_at_start == len(cidr_nw):
  181. # logger.debug("no CIDR groups have been re-splitted (perhaps there were enough (2**x)): %d" %
  182. # cidr_nw_len_at_start)
  183. pass
  184. # then add CIDR based groups
  185. for nw in cidr_nw:
  186. #logger.debug("g5: adding sub: %r" % nw)
  187. group = Group(ip_network_object=nw)
  188. subgroups.append(group)
  189. if use_plus1 and len(subgroups) > 1:
  190. subgroups[-1].is_plus1 = True
  191. """
  192. if len(subgroups) < 600:
  193. for g in subgroups:
  194. logger.debug("%r" % g)
  195. """
  196. # state should now be: [single address groups]+[CIDR groups]
  197. return subgroups
  198. def _addr_get(self):
  199. addr = []
  200. if self.ip_network is not None:
  201. addr.append(self.ip_network.compressed)
  202. else:
  203. addr.extend([host.compressed for host in self.ip_hosts])
  204. return addr
  205. # return all addresses as list of strings (mixed CIDR and single notation)
  206. addresses = property(_addr_get)
  207. def _addr_get_single_bytes(self):
  208. if self.group_type == Group.GROUP_TYPE_CIDR:
  209. return self.ip_network.hosts
  210. else:
  211. return [host.packed for host in self.ip_hosts]
  212. # return all addresses als list of bytes (only single non-CIDR notation)
  213. addresses_single_bytes = property(_addr_get_single_bytes)
  214. def __repr__(self):
  215. if self.group_type == Group.GROUP_TYPE_CIDR:
  216. # "1.2.3.4/x"
  217. return self.ip_network.compressed
  218. else:
  219. # "1.2.3.4 1.2.3.5 ..."
  220. return " ".join([host.compressed for host in self.ip_hosts])