ソースを参照

Add initial concept to map data contained in CSV to IDs with most communication in an interval

dustin.born 7 年 前
コミット
dfb21da65c
1 ファイル変更158 行追加0 行削除
  1. 158 0
      code/ID2TLib/MapInputCSVToIDs.py

+ 158 - 0
code/ID2TLib/MapInputCSVToIDs.py

@@ -0,0 +1,158 @@
+
+# needed because of machine inprecision. E.g A time difference of 0.1s is stored as >0.1s
+EPS_TOLERANCE = 1e-13  # works for a difference of 0.1, no less
+
+def find_interval_with_most_comm(packets, number_ids: int, max_int_time: float):
+    """
+    Finds a time interval of the given seconds where the given number of ids communicate among themselves the most.
+    
+    :param packets: The packets containing the communication
+    :param number_ids: The number of ids that are to be considered
+    :param max_int_time: A short description of the attack.
+    :return: A triple consisting of the ids, as well as start and end idx with respect to the given packets. 
+    """
+
+    def get_nez_msg_counts(msg_counts: dict):
+        """
+        Filters out all msg_counts that have 0 as value
+        """
+        nez_msg_counts = dict()
+        for msg in msg_counts.keys():
+            count = msg_counts[msg]
+            if count > 0:
+                nez_msg_counts[msg] = count
+        return nez_msg_counts
+
+    def greater_than(a: float, b: float):
+        """
+        A greater than operator desgined to handle slight machine inprecision up to EPS_TOLERANCE.
+        :return: True if a > b, otherwise False
+        """
+        return b - a < -EPS_TOLERANCE
+
+    def change_msg_counts(msg_counts: dict, idx: int, add=True):
+        """
+        Changes the value of the message count of the message occuring in the packet specified by the given index. 
+        Adds 1 if add is True and subtracts 1 otherwise.
+        """
+        change = 1 if add else -1
+        id_src, id_dst = packets[idx]["Src"], packets[idx]["Dst"]
+        src_to_dst = "{0}-{1}".format(id_src, id_dst)
+        dst_to_src = "{0}-{1}".format(id_dst, id_src)
+
+        if src_to_dst in msg_counts.keys():
+            msg_counts[src_to_dst] += change
+        elif dst_to_src in msg_counts.keys():
+            msg_counts[dst_to_src] += change
+        elif add:
+            msg_counts[src_to_dst] = 1
+
+    def count_ids_in_msg_counts(msg_counts: dict):
+        """
+        Counts all ids that are involved in messages with a non zero message count
+        """
+        ids = set()
+        for msg in msg_counts.keys():
+            src, dst = msg.split("-")
+            ids.add(dst)
+            ids.add(src)
+        return len(ids)
+
+    def get_msg_count_first_ids(msg_counts: list):
+        """
+        Finds the ids that communicate among themselves the most with respect to the given message counts.
+        :param msg_counts: a sorted list of message counts where each entry is a tuple of key and value
+        :return: The picked ids and their total message count as a tuple
+        """
+        # if order of most messages is important, use an additional list
+        picked_ids = set()
+        total_msg_count = 0
+
+        # iterate over every message count
+        for i, msg in enumerate(msg_counts):
+            count_picked_ids = len(picked_ids)
+            id_one, id_two = msg[0].split("-")
+
+            # if enough ids have been found, stop
+            if count_picked_ids >= number_ids:
+                break
+
+            # if two ids can be added without exceeding the desired number of ids, add them
+            if count_picked_ids - 2 <= number_ids:
+                picked_ids.add(id_one)
+                picked_ids.add(id_two)
+                total_msg_count += msg[1]
+
+            # if there is only room for one more id to be added, 
+            # find one that is already contained in the picked ids
+            else:
+                for j, msg in enumerate(msg_counts[i:]):
+                    id_one, id_two = msg[0].split("-")
+                    if id_one in picked_ids:
+                        picked_ids.add(id_two)
+                        total_msg_count += msg[1]
+                        break
+                    elif id_two in picked_ids:
+                        picked_ids.add(id_one)
+                        total_msg_count += msg[1]
+                        break
+                break
+
+        return picked_ids, total_msg_count
+
+    # first find all possible intervals that contain enough ids that communicate among themselves
+    idx_low, idx_high = 0, 0
+    msg_counts = dict()
+    possible_intervals = []
+
+    # Iterate over all packets from start to finish and process the info of each packet
+    # If time of packet within time interval, update the message count for this communication
+    # If time of packet exceeds time interval, substract from the message count for this communication
+    while True:
+        if idx_high < len(packets):
+            cur_int_time = float(packets[idx_high]["Time"]) - float(packets[idx_low]["Time"])
+ 
+        # if current interval time exceeds time interval, save the message counts if appropriate, or stop if no more packets
+        if greater_than(cur_int_time, max_int_time) or idx_high >= len(packets):
+            # get all message counts for communications that took place in the current intervall
+            nez_msg_counts = get_nez_msg_counts(msg_counts)
+
+            # if we have enough ids as specified by the caller, mark as possible interval
+            if count_ids_in_msg_counts(nez_msg_counts) >= number_ids:
+                #possible_intervals.append((nez_msg_counts, packets[idx_low]["Time"], packets[idx_high-1]["Time"]))
+                possible_intervals.append((nez_msg_counts, idx_low, idx_high - 1))
+
+            if idx_high >= len(packets):
+                break
+
+        # let idx_low "catch up" so that the current interval time fits into the interval time specified by the caller
+        while greater_than(cur_int_time, max_int_time):
+            change_msg_counts(msg_counts, idx_low, add=False)
+            idx_low += 1
+            cur_int_time = float(packets[idx_high]["Time"]) - float(packets[idx_low]["Time"])
+
+        # consume the new packet at idx_high and process its information
+        change_msg_counts(msg_counts, idx_high)
+        idx_high += 1
+
+
+    # now find the interval in which as many ids as specified communicate the most in the given time interval
+    summed_intervals = []
+    cur_highest_sum = 0
+    
+    # for every interval compute the sum of msg_counts of the first most communicative ids and eventually find
+    # the interval(s) with most communication and its ids
+    for interval in possible_intervals:
+        msg_counts = interval[0].items()
+        sorted_msg_counts = sorted(msg_counts, key=lambda x: x[1], reverse=True)
+
+        picked_ids, msg_sum = get_msg_count_first_ids(sorted_msg_counts)
+
+        if msg_sum == cur_highest_sum:
+            summed_intervals.append({"IDs": picked_ids, "MsgSum": msg_sum, "Start": interval[1], "End": interval[2]})
+        elif msg_sum > cur_highest_sum:
+            summed_intervals = []
+            summed_intervals.append({"IDs": picked_ids, "MsgSum": msg_sum, "Start": interval[1], "End": interval[2]})
+            cur_highest_sum = msg_sum
+
+    return summed_intervals