7 年前 · dfb21da65c
--- a/code/ID2TLib/MapInputCSVToIDs.py
+++ b/code/ID2TLib/MapInputCSVToIDs.py
@@ -0,0 +1,158 @@
 
				+
			
 
				+# needed because of machine inprecision. E.g A time difference of 0.1s is stored as >0.1s
			
 
				+EPS_TOLERANCE = 1e-13  # works for a difference of 0.1, no less
			
 
				+
			
 
				+def find_interval_with_most_comm(packets, number_ids: int, max_int_time: float):
			
 
				+    """
			
 
				+    Finds a time interval of the given seconds where the given number of ids communicate among themselves the most.
			
 
				+    
			
 
				+    :param packets: The packets containing the communication
			
 
				+    :param number_ids: The number of ids that are to be considered
			
 
				+    :param max_int_time: A short description of the attack.
			
 
				+    :return: A triple consisting of the ids, as well as start and end idx with respect to the given packets. 
			
 
				+    """
			
 
				+
			
 
				+    def get_nez_msg_counts(msg_counts: dict):
			
 
				+        """
			
 
				+        Filters out all msg_counts that have 0 as value
			
 
				+        """
			
 
				+        nez_msg_counts = dict()
			
 
				+        for msg in msg_counts.keys():
			
 
				+            count = msg_counts[msg]
			
 
				+            if count > 0:
			
 
				+                nez_msg_counts[msg] = count
			
 
				+        return nez_msg_counts
			
 
				+
			
 
				+    def greater_than(a: float, b: float):
			
 
				+        """
			
 
				+        A greater than operator desgined to handle slight machine inprecision up to EPS_TOLERANCE.
			
 
				+        :return: True if a > b, otherwise False
			
 
				+        """
			
 
				+        return b - a < -EPS_TOLERANCE
			
 
				+
			
 
				+    def change_msg_counts(msg_counts: dict, idx: int, add=True):
			
 
				+        """
			
 
				+        Changes the value of the message count of the message occuring in the packet specified by the given index. 
			
 
				+        Adds 1 if add is True and subtracts 1 otherwise.
			
 
				+        """
			
 
				+        change = 1 if add else -1
			
 
				+        id_src, id_dst = packets[idx]["Src"], packets[idx]["Dst"]
			
 
				+        src_to_dst = "{0}-{1}".format(id_src, id_dst)
			
 
				+        dst_to_src = "{0}-{1}".format(id_dst, id_src)
			
 
				+
			
 
				+        if src_to_dst in msg_counts.keys():
			
 
				+            msg_counts[src_to_dst] += change
			
 
				+        elif dst_to_src in msg_counts.keys():
			
 
				+            msg_counts[dst_to_src] += change
			
 
				+        elif add:
			
 
				+            msg_counts[src_to_dst] = 1
			
 
				+
			
 
				+    def count_ids_in_msg_counts(msg_counts: dict):
			
 
				+        """
			
 
				+        Counts all ids that are involved in messages with a non zero message count
			
 
				+        """
			
 
				+        ids = set()
			
 
				+        for msg in msg_counts.keys():
			
 
				+            src, dst = msg.split("-")
			
 
				+            ids.add(dst)
			
 
				+            ids.add(src)
			
 
				+        return len(ids)
			
 
				+
			
 
				+    def get_msg_count_first_ids(msg_counts: list):
			
 
				+        """
			
 
				+        Finds the ids that communicate among themselves the most with respect to the given message counts.
			
 
				+        :param msg_counts: a sorted list of message counts where each entry is a tuple of key and value
			
 
				+        :return: The picked ids and their total message count as a tuple
			
 
				+        """
			
 
				+        # if order of most messages is important, use an additional list
			
 
				+        picked_ids = set()
			
 
				+        total_msg_count = 0
			
 
				+
			
 
				+        # iterate over every message count
			
 
				+        for i, msg in enumerate(msg_counts):
			
 
				+            count_picked_ids = len(picked_ids)
			
 
				+            id_one, id_two = msg[0].split("-")
			
 
				+
			
 
				+            # if enough ids have been found, stop
			
 
				+            if count_picked_ids >= number_ids:
			
 
				+                break
			
 
				+
			
 
				+            # if two ids can be added without exceeding the desired number of ids, add them
			
 
				+            if count_picked_ids - 2 <= number_ids:
			
 
				+                picked_ids.add(id_one)
			
 
				+                picked_ids.add(id_two)
			
 
				+                total_msg_count += msg[1]
			
 
				+
			
 
				+            # if there is only room for one more id to be added, 
			
 
				+            # find one that is already contained in the picked ids
			
 
				+            else:
			
 
				+                for j, msg in enumerate(msg_counts[i:]):
			
 
				+                    id_one, id_two = msg[0].split("-")
			
 
				+                    if id_one in picked_ids:
			
 
				+                        picked_ids.add(id_two)
			
 
				+                        total_msg_count += msg[1]
			
 
				+                        break
			
 
				+                    elif id_two in picked_ids:
			
 
				+                        picked_ids.add(id_one)
			
 
				+                        total_msg_count += msg[1]
			
 
				+                        break
			
 
				+                break
			
 
				+
			
 
				+        return picked_ids, total_msg_count
			
 
				+
			
 
				+    # first find all possible intervals that contain enough ids that communicate among themselves
			
 
				+    idx_low, idx_high = 0, 0
			
 
				+    msg_counts = dict()
			
 
				+    possible_intervals = []
			
 
				+
			
 
				+    # Iterate over all packets from start to finish and process the info of each packet
			
 
				+    # If time of packet within time interval, update the message count for this communication
			
 
				+    # If time of packet exceeds time interval, substract from the message count for this communication
			
 
				+    while True:
			
 
				+        if idx_high < len(packets):
			
 
				+            cur_int_time = float(packets[idx_high]["Time"]) - float(packets[idx_low]["Time"])
			
 
				+ 
			
 
				+        # if current interval time exceeds time interval, save the message counts if appropriate, or stop if no more packets
			
 
				+        if greater_than(cur_int_time, max_int_time) or idx_high >= len(packets):
			
 
				+            # get all message counts for communications that took place in the current intervall
			
 
				+            nez_msg_counts = get_nez_msg_counts(msg_counts)
			
 
				+
			
 
				+            # if we have enough ids as specified by the caller, mark as possible interval
			
 
				+            if count_ids_in_msg_counts(nez_msg_counts) >= number_ids:
			
 
				+                #possible_intervals.append((nez_msg_counts, packets[idx_low]["Time"], packets[idx_high-1]["Time"]))
			
 
				+                possible_intervals.append((nez_msg_counts, idx_low, idx_high - 1))
			
 
				+
			
 
				+            if idx_high >= len(packets):
			
 
				+                break
			
 
				+
			
 
				+        # let idx_low "catch up" so that the current interval time fits into the interval time specified by the caller
			
 
				+        while greater_than(cur_int_time, max_int_time):
			
 
				+            change_msg_counts(msg_counts, idx_low, add=False)
			
 
				+            idx_low += 1
			
 
				+            cur_int_time = float(packets[idx_high]["Time"]) - float(packets[idx_low]["Time"])
			
 
				+
			
 
				+        # consume the new packet at idx_high and process its information
			
 
				+        change_msg_counts(msg_counts, idx_high)
			
 
				+        idx_high += 1
			
 
				+
			
 
				+
			
 
				+    # now find the interval in which as many ids as specified communicate the most in the given time interval
			
 
				+    summed_intervals = []
			
 
				+    cur_highest_sum = 0
			
 
				+    
			
 
				+    # for every interval compute the sum of msg_counts of the first most communicative ids and eventually find
			
 
				+    # the interval(s) with most communication and its ids
			
 
				+    for interval in possible_intervals:
			
 
				+        msg_counts = interval[0].items()
			
 
				+        sorted_msg_counts = sorted(msg_counts, key=lambda x: x[1], reverse=True)
			
 
				+
			
 
				+        picked_ids, msg_sum = get_msg_count_first_ids(sorted_msg_counts)
			
 
				+
			
 
				+        if msg_sum == cur_highest_sum:
			
 
				+            summed_intervals.append({"IDs": picked_ids, "MsgSum": msg_sum, "Start": interval[1], "End": interval[2]})
			
 
				+        elif msg_sum > cur_highest_sum:
			
 
				+            summed_intervals = []
			
 
				+            summed_intervals.append({"IDs": picked_ids, "MsgSum": msg_sum, "Start": interval[1], "End": interval[2]})
			
 
				+            cur_highest_sum = msg_sum
			
 
				+
			
 
				+    return summed_intervals