MapInputCSVToIDs.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. # needed because of machine inprecision. E.g A time difference of 0.1s is stored as >0.1s
  2. EPS_TOLERANCE = 1e-13 # works for a difference of 0.1, no less
  3. def find_interval_with_most_comm(packets, number_ids: int, max_int_time: float):
  4. """
  5. Finds a time interval of the given seconds where the given number of ids communicate among themselves the most.
  6. :param packets: The packets containing the communication
  7. :param number_ids: The number of ids that are to be considered
  8. :param max_int_time: A short description of the attack.
  9. :return: A triple consisting of the ids, as well as start and end idx with respect to the given packets.
  10. """
  11. def get_nez_msg_counts(msg_counts: dict):
  12. """
  13. Filters out all msg_counts that have 0 as value
  14. """
  15. nez_msg_counts = dict()
  16. for msg in msg_counts.keys():
  17. count = msg_counts[msg]
  18. if count > 0:
  19. nez_msg_counts[msg] = count
  20. return nez_msg_counts
  21. def greater_than(a: float, b: float):
  22. """
  23. A greater than operator desgined to handle slight machine inprecision up to EPS_TOLERANCE.
  24. :return: True if a > b, otherwise False
  25. """
  26. return b - a < -EPS_TOLERANCE
  27. def change_msg_counts(msg_counts: dict, idx: int, add=True):
  28. """
  29. Changes the value of the message count of the message occuring in the packet specified by the given index.
  30. Adds 1 if add is True and subtracts 1 otherwise.
  31. """
  32. change = 1 if add else -1
  33. id_src, id_dst = packets[idx]["Src"], packets[idx]["Dst"]
  34. src_to_dst = "{0}-{1}".format(id_src, id_dst)
  35. dst_to_src = "{0}-{1}".format(id_dst, id_src)
  36. if src_to_dst in msg_counts.keys():
  37. msg_counts[src_to_dst] += change
  38. elif dst_to_src in msg_counts.keys():
  39. msg_counts[dst_to_src] += change
  40. elif add:
  41. msg_counts[src_to_dst] = 1
  42. def count_ids_in_msg_counts(msg_counts: dict):
  43. """
  44. Counts all ids that are involved in messages with a non zero message count
  45. """
  46. ids = set()
  47. for msg in msg_counts.keys():
  48. src, dst = msg.split("-")
  49. ids.add(dst)
  50. ids.add(src)
  51. return len(ids)
  52. def get_msg_count_first_ids(msg_counts: list):
  53. """
  54. Finds the ids that communicate among themselves the most with respect to the given message counts.
  55. :param msg_counts: a sorted list of message counts where each entry is a tuple of key and value
  56. :return: The picked ids and their total message count as a tuple
  57. """
  58. # if order of most messages is important, use an additional list
  59. picked_ids = set()
  60. total_msg_count = 0
  61. # iterate over every message count
  62. for i, msg in enumerate(msg_counts):
  63. count_picked_ids = len(picked_ids)
  64. id_one, id_two = msg[0].split("-")
  65. # if enough ids have been found, stop
  66. if count_picked_ids >= number_ids:
  67. break
  68. # if two ids can be added without exceeding the desired number of ids, add them
  69. if count_picked_ids - 2 <= number_ids:
  70. picked_ids.add(id_one)
  71. picked_ids.add(id_two)
  72. total_msg_count += msg[1]
  73. # if there is only room for one more id to be added,
  74. # find one that is already contained in the picked ids
  75. else:
  76. for j, msg in enumerate(msg_counts[i:]):
  77. id_one, id_two = msg[0].split("-")
  78. if id_one in picked_ids:
  79. picked_ids.add(id_two)
  80. total_msg_count += msg[1]
  81. break
  82. elif id_two in picked_ids:
  83. picked_ids.add(id_one)
  84. total_msg_count += msg[1]
  85. break
  86. break
  87. return picked_ids, total_msg_count
  88. # first find all possible intervals that contain enough ids that communicate among themselves
  89. idx_low, idx_high = 0, 0
  90. msg_counts = dict()
  91. possible_intervals = []
  92. # Iterate over all packets from start to finish and process the info of each packet
  93. # If time of packet within time interval, update the message count for this communication
  94. # If time of packet exceeds time interval, substract from the message count for this communication
  95. while True:
  96. if idx_high < len(packets):
  97. cur_int_time = float(packets[idx_high]["Time"]) - float(packets[idx_low]["Time"])
  98. # if current interval time exceeds time interval, save the message counts if appropriate, or stop if no more packets
  99. if greater_than(cur_int_time, max_int_time) or idx_high >= len(packets):
  100. # get all message counts for communications that took place in the current intervall
  101. nez_msg_counts = get_nez_msg_counts(msg_counts)
  102. # if we have enough ids as specified by the caller, mark as possible interval
  103. if count_ids_in_msg_counts(nez_msg_counts) >= number_ids:
  104. #possible_intervals.append((nez_msg_counts, packets[idx_low]["Time"], packets[idx_high-1]["Time"]))
  105. possible_intervals.append((nez_msg_counts, idx_low, idx_high - 1))
  106. if idx_high >= len(packets):
  107. break
  108. # let idx_low "catch up" so that the current interval time fits into the interval time specified by the caller
  109. while greater_than(cur_int_time, max_int_time):
  110. change_msg_counts(msg_counts, idx_low, add=False)
  111. idx_low += 1
  112. cur_int_time = float(packets[idx_high]["Time"]) - float(packets[idx_low]["Time"])
  113. # consume the new packet at idx_high and process its information
  114. change_msg_counts(msg_counts, idx_high)
  115. idx_high += 1
  116. # now find the interval in which as many ids as specified communicate the most in the given time interval
  117. summed_intervals = []
  118. cur_highest_sum = 0
  119. # for every interval compute the sum of msg_counts of the first most communicative ids and eventually find
  120. # the interval(s) with most communication and its ids
  121. for interval in possible_intervals:
  122. msg_counts = interval[0].items()
  123. sorted_msg_counts = sorted(msg_counts, key=lambda x: x[1], reverse=True)
  124. picked_ids, msg_sum = get_msg_count_first_ids(sorted_msg_counts)
  125. if msg_sum == cur_highest_sum:
  126. summed_intervals.append({"IDs": picked_ids, "MsgSum": msg_sum, "Start": interval[1], "End": interval[2]})
  127. elif msg_sum > cur_highest_sum:
  128. summed_intervals = []
  129. summed_intervals.append({"IDs": picked_ids, "MsgSum": msg_sum, "Start": interval[1], "End": interval[2]})
  130. cur_highest_sum = msg_sum
  131. return summed_intervals