group.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. import logging
  2. import math
  3. logger = logging.getLogger("pra_framework")
  4. class Group(object):
  5. """
  6. A Group represents a group of IP addresses regarding the PRA.
  7. It's generally connected to a marker value for identification purposes.
  8. A Group contains single addreses (group_type == Group.GROUP_TYPE_SINGLE_ADDRESSES)
  9. OR address ranges (group_type == Group.GROUP_TYPE_CIDR)
  10. """
  11. GROUP_TYPE_SINGLE_ADDRESSES = 0
  12. GROUP_TYPE_CIDR = 1
  13. def __init__(self,
  14. ip_network_object=None,
  15. ip_host_objects=None,
  16. response_count=0):
  17. """
  18. Given Parameters: The IPs this group is made of.
  19. ip_network_bytes -- bytes like b"1234" to create this group
  20. ip_host_objects -- IPs to be used to create this group
  21. """
  22. self.ip_network = None
  23. self.ip_hosts = None
  24. if ip_network_object is not None:
  25. self.ip_network = ip_network_object
  26. else:
  27. self.ip_hosts = ip_host_objects
  28. # "+1"-groups won't get scanned, the amount of responses is derived implicitly
  29. self.is_plus1 = False
  30. # store "+1"-Subgroup for faster access (marker bytes OR group itself)
  31. self.plus1_subgroup = None
  32. # logger.debug("group network/addresses: %r/%r" % (self.ip_network, self.ip_hosts))
  33. # full marker as bytes
  34. self.marker_bytes = None
  35. self.marker_value_int = None
  36. # how many times the group is counted in the report
  37. self.response_count = response_count
  38. # needed to create additional blacklists for other groups which do not need
  39. # to scan this addresses again (already placed in separate group)
  40. self.top_group = None
  41. self.subgroups = set()
  42. # indicates that this subgroup has response count unequal
  43. # to the sum of responses of all its subgroups
  44. # positive value: top group has more responses than sum of all subgroups, negative value: less ...)
  45. # TODO: activate if needed (deactivated to save memory)
  46. #self.response_discrepancy = 0
  47. def _get_grouptype(self):
  48. return Group.GROUP_TYPE_CIDR if self.ip_network is not None else Group.GROUP_TYPE_SINGLE_ADDRESSES
  49. group_type = property(_get_grouptype)
  50. def _get_amount_addresses(self):
  51. return self.ip_network.num_addresses if self.group_type == Group.GROUP_TYPE_CIDR else len(self.ip_hosts)
  52. amount_addresses = property(_get_amount_addresses)
  53. def add_subgroup(self, group):
  54. group.top_group = self
  55. #group.top_group_markervalue_bytes = self.markervalue_bytes
  56. self.subgroups.add(group)
  57. def create_subgroups(self, amount_subgroups, ipv4_addresses=[], use_plus1=False):
  58. """
  59. Create amount subgroups based on amount_subgroups and response counts.
  60. amount_subgroups -- amount of subgroups to create, this includes the +1 group
  61. e.g. amount_subgroups = 4 = subgroups + "+1"
  62. ipv4_addresses -- list of single IP addresses as IP objects to be added as dedicated group.
  63. use_plus1 -- define last subgroup as +1 group
  64. return -- created subgroups as list
  65. """
  66. subgroups = []
  67. if self.response_count >= self.amount_addresses:
  68. if self.response_count > self.amount_addresses:
  69. logger.warning("got more responses than amount addresses, too much noise?"
  70. " responses/# addresses = %d/%d" % (self.response_count, self.amount_addresses))
  71. return subgroups
  72. # check if this is a single address group (CIDR prefix is 32 or just 1 single address)
  73. if self.amount_addresses <= 1:
  74. return subgroups
  75. if self.group_type == Group.GROUP_TYPE_SINGLE_ADDRESSES:
  76. single_addr_amount = len(self.ip_hosts)
  77. """
  78. logger.debug("subgrouping single addresses: addresses/target subgroups = %d/%d" %
  79. (single_addr_amount, amount_subgroups))
  80. """
  81. if single_addr_amount == 0:
  82. logger.warning("no host for subgrouping!")
  83. # split by amount of adresses
  84. groupsize_single = math.floor(single_addr_amount / amount_subgroups)
  85. # at minimum 1 group
  86. groupsize_single = max(1, groupsize_single)
  87. for i in range(0, single_addr_amount, groupsize_single):
  88. # not enough room for more groups, add all remaining addresses
  89. if len(subgroups) + 1 >= amount_subgroups:
  90. subgroup = Group(ip_host_objects=self.ip_hosts[i:])
  91. subgroups.append(subgroup)
  92. break
  93. else:
  94. sub_ips = self.ip_hosts[i: i + groupsize_single]
  95. # this should not happen
  96. if len(sub_ips) != 0:
  97. subgroup = Group(ip_host_objects=sub_ips)
  98. subgroups.append(subgroup)
  99. else:
  100. break
  101. elif self.group_type == Group.GROUP_TYPE_CIDR:
  102. subgroups_single_addresses = 0
  103. #logger.debug("picking IPv4 addresses which belong to this groups (searching through: %d)" % len(ip_host_objects))
  104. single_addresses_for_regrouping = ipv4_addresses
  105. # amount of single addresses to be re-groupted
  106. single_addr_amount = len(single_addresses_for_regrouping)
  107. if single_addr_amount > 0:
  108. if len(single_addresses_for_regrouping) > 0:
  109. #logger.debug("re-grouping %d single addresses for nw %r" % (single_addr_amount, self.ip_network))
  110. #for single_addr in single_addresses_for_regrouping:
  111. # logger.debug("%r" % single_addr)
  112. # calculate amount of subgroups for single addresses:
  113. # (amount of single addreses / total group address space) * amount of groups
  114. subgroups_single_addresses = math.floor((single_addr_amount / self.ip_network.num_addresses) *
  115. amount_subgroups)
  116. # at minimum 1 group
  117. subgroups_single_addresses = max(1, subgroups_single_addresses)
  118. # not more groups than single addresses
  119. subgroups_single_addresses = min(single_addr_amount, subgroups_single_addresses)
  120. groupsize_single = math.floor(single_addr_amount / subgroups_single_addresses)
  121. """
  122. logger.debug("creating single addresses groups,"
  123. "addr total=%d, groups total=%d, group size=: %d/%d/%d" %
  124. (single_addr_amount,
  125. subgroups_single_addresses,
  126. groupsize_single))
  127. """
  128. for i in range(0, single_addr_amount, groupsize_single):
  129. if len(subgroups) + 1 >= subgroups_single_addresses:
  130. group = Group(ip_host_objects=single_addresses_for_regrouping[i:])
  131. #logger.debug("adding single addresses group: %r" % group)
  132. subgroups.append(group)
  133. break
  134. else:
  135. addresses = single_addresses_for_regrouping[i: i + groupsize_single]
  136. if len(addresses) != 0:
  137. group = Group(ip_host_objects=addresses)
  138. #logger.debug("adding single addresses group: %r" % group)
  139. subgroups.append(group)
  140. else:
  141. # no more groups to split up
  142. break
  143. # round to next lower integer with 2**x <= (amount of groups for cidr)
  144. # Example: 16/5 (ip/groups) = 5 -> 2**_log_2(5)_ = 4 top groups [4,4,4,4]
  145. # (which are splittet later on) -> [4,4,4,2,2]
  146. cidr_bits_plus = math.floor(math.log(
  147. amount_subgroups - subgroups_single_addresses,
  148. 2))
  149. # not more CIDR bits than available
  150. cidr_bits_plus = min(32 - self.ip_network.prefixlen, cidr_bits_plus)
  151. """
  152. logger.debug("current prefix=%d, CIDR bits plus=%d, amount subgroups=%d, single addresses=%d" %
  153. (self.ip_network.prefixlen, cidr_bits_plus, amount_subgroups, subgroups_single_addresses))
  154. """
  155. # create subnets: e.g. 1.2.3.0/24 -> CIDR+1 -> 1.2.3.0/25, 1.2.3.128/25
  156. cidr_nw = self.ip_network.subnets(prefixlen_diff=cidr_bits_plus)
  157. cidr_nw_len_at_start = len(cidr_nw)
  158. # amount of times we reached CIDR /32
  159. splitfail_cnt = 0
  160. # logger.debug("re-splitting groups for CIDR, initial groupsize/target amount of groups/CIDR bits +x: %d/%d/%d" %
  161. # (cidr_nw_len_at_start, amount_groups_for_cidr, cidr_bits_plus))
  162. subgroup_len = len(subgroups)
  163. while len(cidr_nw) + subgroup_len < amount_subgroups:
  164. # split subgroups until we have enough of them
  165. # [A,B,C,D] -> split A by 1 bit -> [B,C,D,a,a] -> split B by 1 bit -> [C,D,a,a,b,b] ...
  166. group_to_split = cidr_nw[0]
  167. del cidr_nw[0]
  168. if group_to_split.prefixlen == 32:
  169. # nothing to split: re-append to the end
  170. cidr_nw.append(group_to_split)
  171. splitfail_cnt += 1
  172. #logger.debug("can't create subnets anymore: /32 reached for %r" % group_to_split)
  173. if splitfail_cnt > len(cidr_nw):
  174. # logger.warning("too many split fails: single addresses reached?")
  175. break
  176. else:
  177. subnets = group_to_split.subnets(prefixlen_diff=1)
  178. if subgroup_len + len(cidr_nw) + len(subnets) > amount_subgroups:
  179. logger.debug("!!! stopping CIDR subgrouping: split would increase max number")
  180. cidr_nw.append(group_to_split)
  181. break
  182. #logger.debug("new subgroups: %d" % len(subnets))
  183. # append subnet to the end and try next
  184. cidr_nw.extend(subnets)
  185. if cidr_nw_len_at_start == len(cidr_nw):
  186. # logger.debug("no CIDR groups have been re-splitted (perhaps there were enough (2**x)): %d" %
  187. # cidr_nw_len_at_start)
  188. pass
  189. # then add CIDR based groups
  190. for nw in cidr_nw:
  191. #logger.debug("g5: adding sub: %r" % nw)
  192. group = Group(ip_network_object=nw)
  193. subgroups.append(group)
  194. if use_plus1 and len(subgroups) > 1:
  195. subgroups[-1].is_plus1 = True
  196. """
  197. if len(subgroups) < 600:
  198. for g in subgroups:
  199. logger.debug("%r" % g)
  200. """
  201. # state should now be: [single address groups]+[CIDR groups]
  202. return subgroups
  203. def _addr_get(self):
  204. addr = []
  205. if self.ip_network is not None:
  206. addr.append(self.ip_network.compressed)
  207. else:
  208. addr.extend([host.compressed for host in self.ip_hosts])
  209. return addr
  210. # return all addresses as list of strings (mixed CIDR and single notation)
  211. addresses = property(_addr_get)
  212. def _addr_get_single_bytes(self):
  213. if self.group_type == Group.GROUP_TYPE_CIDR:
  214. return self.ip_network.hosts
  215. else:
  216. return [host.packed for host in self.ip_hosts]
  217. # return all addresses als list of bytes (only single non-CIDR notation)
  218. addresses_single_bytes = property(_addr_get_single_bytes)
  219. def __repr__(self):
  220. if self.group_type == Group.GROUP_TYPE_CIDR:
  221. # "1.2.3.4/x"
  222. return self.ip_network.compressed
  223. else:
  224. # "1.2.3.4 1.2.3.5 ..."
  225. return " ".join([host.compressed for host in self.ip_hosts])