Pārlūkot izejas kodu

Further improve code documentation and structure.

dustin.born 7 gadi atpakaļ
vecāks
revīzija
77ce624d54
2 mainītis faili ar 332 papildinājumiem un 192 dzēšanām
  1. 175 113
      code/Attack/MembersMgmtCommAttack.py
  2. 157 79
      code/ID2TLib/CommunicationProcessor.py

+ 175 - 113
code/Attack/MembersMgmtCommAttack.py

@@ -12,7 +12,20 @@ class MessageType(Enum):
     SALITY_HELLO_REPLY = 104
 
 class Message():
-    def __init__(self, msg_id: int, src: str, dst: str, type_: MessageType, time: float, refer_msg_id: int=-1):
+    """
+    Defines a compact message type that contains all necessary information.
+    """
+    def __init__(self, msg_id: int, src, dst, type_: MessageType, time: float, refer_msg_id: int=-1):
+        """
+        Constructs a message with the given parameters.
+
+        :param msg_id: the ID of the message
+        :param src: something identifiying the source, e.g. ID or configuration
+        :param dst: something identifiying the destination, e.g. ID or configuration
+        :param type_: the type of the message
+        :param time: the timestamp of the message
+        :param refer_msg_id: the ID this message is a request for or reply to. -1 if there is no related message.
+        """
         self.msg_id = msg_id
         self.src = src
         self.dst = dst
@@ -124,15 +137,110 @@ class MembersMgmtCommAttack(BaseAttack.BaseAttack):
 
         
     def generate_attack_pcap(self):
-        def add_ids_to_config(ids_to_add, existing_ips, new_ips, bot_configs, idtype="local", router_mac=""):
+        # create the final messages that have to be sent, including all bot configurations
+        messages = self._create_messages()
+
+        # for msg in messages:
+        #     print(msg)
+
+        # Setup (initial) parameters for packet creation loop
+        BUFFER_SIZE = 1000
+        pkt_gen = PacketGenerator()
+        file_timestamp_prv = messages[0].time
+        pcap_timestamp = self.get_param_value(Param.INJECT_AT_TIMESTAMP)
+        padding = self.get_param_value(Param.PACKET_PADDING)
+        packets = deque(maxlen=BUFFER_SIZE)
+        total_pkts = 0
+        limit_packetcount = self.get_param_value(Param.PACKETS_LIMIT)
+        limit_duration = self.get_param_value(Param.ATTACK_DURATION)
+        duration = 0
+        path_attack_pcap = None
+
+        # create packets to write to PCAP file
+        for msg in messages:
+            # retrieve the source and destination configurations
+            id_src, id_dst = msg.src["ID"], msg.dst["ID"]
+            ip_src, ip_dst = msg.src["IP"], msg.dst["IP"]
+            mac_src, mac_dst = msg.src["MAC"], msg.dst["MAC"]
+            port_src, port_dst = msg.src["Port"], msg.dst["Port"]
+            ttl = msg.src["TTL"]
+
+            # update timestamps and duration
+            file_timestamp = msg.time
+            file_time_delta = file_timestamp - file_timestamp_prv
+            pcap_timestamp += file_time_delta
+            duration += file_time_delta
+            file_timestamp_prv = file_timestamp
+
+            # if total number of packets has been sent or the attack duration has been exceeded, stop
+            if ((limit_packetcount is not None and total_pkts >= limit_packetcount) or 
+                    (limit_duration is not None and duration >= limit_duration)):
+                break
+        
+            # if the type of the message is a NL reply, determine the number of entries
+            nl_size = 0     
+            if msg.type == MessageType.SALITY_NL_REPLY:
+                nl_size = randint(1, 25)    # what is max NL entries? 
+
+            # create suitable IP/UDP packet and add to packets list
+            packet = pkt_gen.generate_mmcom_packet(ip_src=ip_src, ip_dst=ip_dst, ttl=ttl, mac_src=mac_src, mac_dst=mac_dst, 
+                port_src=port_src, port_dst=port_dst, message_type=msg.type, neighborlist_entries=nl_size)
+            PaddingGenerator.add_padding(packet, padding)
+
+            packet.time = pcap_timestamp
+            packets.append(packet)
+            total_pkts += 1
+
+            # Store timestamp of first packet (for attack label)
+            if total_pkts <= 1:
+                self.attack_start_utime = packets[0].time
+            elif total_pkts % BUFFER_SIZE == 0: # every 1000 packets write them to the PCAP file (append)
+                packets = list(packets)
+                PaddingGenerator.equal_length(packets)
+                last_packet = packets[-1]
+                path_attack_pcap = self.write_attack_pcap(packets, True, path_attack_pcap)
+                packets = deque(maxlen=BUFFER_SIZE)
+
+        # if there are unwritten packets remaining, write them to the PCAP file
+        if len(packets) > 0:
+            packets = list(packets)
+            PaddingGenerator.equal_length(packets)
+            path_attack_pcap = self.write_attack_pcap(packets, True, path_attack_pcap)
+            last_packet = packets[-1]
+
+        # Store timestamp of last packet
+        self.attack_end_utime = last_packet.time
+
+        # Return packets sorted by packet by timestamp and total number of packets (sent)
+        return total_pkts , path_attack_pcap
+
+
+    def _create_messages(self):
+        def add_ids_to_config(ids_to_add: list, existing_ips: list, new_ips: list, bot_configs: dict, idtype:str="local", router_mac:str=""):
+            """
+            Creates IP and MAC configurations for the given IDs and adds them to the existing configurations object.
+
+            :param ids_to_add: all sorted IDs that have to be configured and added
+            :param existing_ips: the existing IPs in the PCAP file that should be assigned to some, or all, IDs
+            :param new_ips: the newly generated IPs that should be assigned to some, or all, IDs
+            :param bot_configs: the existing configurations for the bots
+            :param idtype: the locality type of the IDs
+            :param router_mac: the MAC address of the router in the PCAP 
+            """
+
             ids = ids_to_add.copy()
+            # macgen only needed, when IPs are new local IPs (therefore creating the object here suffices for the current callers
+            # to not end up with the same MAC paired with different IPs)
             macgen = MacAddressGenerator()
+
+            # assign existing IPs and the corresponding MAC addresses in the PCAP to the IDs
             for ip in existing_ips:
                 random_id = choice(ids)
                 mac = self.statistics.process_db_query("macAddress(IPAddress=%s)" % ip)
                 bot_configs[random_id] = {"Type": idtype, "IP": ip, "MAC": mac}
                 ids.remove(random_id)
 
+            # assign new IPs and for local IPs new MACs or for external IPs the router MAC to the IDs
             for ip in new_ips:
                 random_id = choice(ids)
                 if idtype == "local":
@@ -143,22 +251,43 @@ class MembersMgmtCommAttack(BaseAttack.BaseAttack):
                 ids.remove(random_id)
 
         def index_increment(number: int, max: int):
+            """
+            Number increment with rollover.
+            """
             if number + 1 < max:
                 return number + 1
             else:
                 return 0
 
+        def assign_realistic_ttls(bot_configs):
+            # Gamma distribution parameters derived from MAWI 13.8G dataset
+            ids = sorted(bot_configs.keys())
+            alpha, loc, beta = (2.3261710235, -0.188306914406, 44.4853123884)
+            gd = gamma.rvs(alpha, loc=loc, scale=beta, size=len(ids))
+
+            for pos, bot in enumerate(ids):
+                # print(bot)
+                is_invalid = True
+                pos_max = len(gd)
+                while is_invalid:
+                    ttl = int(round(gd[pos]))
+                    if 0 < ttl < 256:  # validity check
+                        is_invalid = False
+                    else:
+                        pos = index_increment(pos, pos_max)
+                bot_configs[bot]["TTL"] = ttl
+
         # parse input CSV or XML
         filepath_xml = self.get_param_value(Param.FILE_XML)
         filepath_csv = self.get_param_value(Param.FILE_CSV)
 
-        # prefer XML input over CSV input
+        # prefer XML input over CSV input (in case both are given)
         if filepath_csv and filepath_xml == self.DEFAULT_XML_PATH:
             filepath_xml = FileUtils.parse_csv_to_xml(filepath_csv) 
 
         abstract_packets = FileUtils.parse_xml(filepath_xml)
 
-        # find a good communication mapping
+        # find a good communication mapping in the input file that matches the users parameters
         duration = self.get_param_value(Param.ATTACK_DURATION)
         number_bots = self.get_param_value(Param.NUMBER_BOTS)
         comm_proc = CommunicationProcessor(abstract_packets)
@@ -168,50 +297,44 @@ class MembersMgmtCommAttack(BaseAttack.BaseAttack):
             print("Error: There is no interval in the given CSV/XML that has enough communication")
             return 0, None
 
+        # retrieve the mapping information
         mapped_ids, id_comms, packet_start_idx, packet_end_idx = comm_interval["IDs"], comm_interval["Comms"], comm_interval["Start"], comm_interval["End"]
-        # print(mapped_ids)
+        # assign the communication processor this mapping for further processing
         comm_proc.set_mapping(abstract_packets[packet_start_idx:packet_end_idx+1], mapped_ids, id_comms)
-        # print(mapped_ids)
+
         # print start and end time of mapped interval
         # print(abstract_packets[packet_start_idx]["Time"])
         # print(abstract_packets[packet_end_idx]["Time"])
 
-        # determine amount of reused IPs
+        # determine number of reused local and external IPs
         reuse_percent_total = self.get_param_value(Param.IP_REUSE_TOTAL)
         reuse_percent_external = self.get_param_value(Param.IP_REUSE_EXTERNAL)
         reuse_percent_local = self.get_param_value(Param.IP_REUSE_LOCAL)
-
         reuse_count_external = int(reuse_percent_total * reuse_percent_external * len(mapped_ids))
         reuse_count_local = int(reuse_percent_total * reuse_percent_local * len(mapped_ids))
 
-        # create bot IP and MAC configs
+        # create locality, IP and MAC configurations for the IDs/Bots
         ipgen = IPGenerator()
         comm_type = self.get_param_value(Param.COMM_TYPE)
         pcapops = PcapAddressOperations(self.statistics)
         router_mac = pcapops.get_probable_router_mac()
         bot_configs = {}
+        # determine the roles of the IDs in the mapping communication-{initiator, responder} or both
         init_ids, respnd_ids, both_ids, messages = comm_proc.det_id_roles_and_msgs(self.msg_types)
+        # use these roles to determine which IDs are to be local and which external
         local_ids, external_ids = comm_proc.det_ext_and_local_ids(comm_type, self.PROB_INIT_IS_LOCAL, self.PROB_RESPND_IS_LOCAL)
-        # print(external_ids)
-
-        # for msg in messages:
-        #     print(msg)
-
-        # print(sorted(list(init_ids)+list(both_ids)))
-        # print(sorted(local_ids))
-        # print(sorted(external_ids))
-
-        #### Set realistic timestamps for messages ####
-        #### ... ####
-
-        # IDs are always added to bot_configs in the same order under a given seed
+       
+        # retrieve and assign the IPs and MACs for the bots with respect to the given parameters
+        # (IDs are always added to bot_configs in the same order under a given seed)
         number_local_ids, number_external_ids = len(local_ids), len(external_ids)
+        # assign addresses for local IDs
         if number_local_ids > 0:
             reuse_count_local = int(reuse_percent_total * reuse_percent_local * number_local_ids) 
             existing_local_ips = sorted(pcapops.get_existing_priv_ips(reuse_count_local))
             new_local_ips = sorted(pcapops.get_new_priv_ips(number_local_ids - len(existing_local_ips)))
             add_ids_to_config(sorted(local_ids), existing_local_ips, new_local_ips, bot_configs)
 
+        # assign addresses for external IDs
         if number_external_ids > 0:
             reuse_count_external = int(reuse_percent_total * reuse_percent_external * number_external_ids) 
             existing_external_ips = sorted(pcapops.get_existing_external_ips(reuse_count_external))
@@ -219,112 +342,51 @@ class MembersMgmtCommAttack(BaseAttack.BaseAttack):
             new_external_ips = sorted([ipgen.random_ip() for _ in range(remaining)])
             add_ids_to_config(sorted(external_ids), existing_external_ips, new_external_ips, bot_configs, idtype="external", router_mac=router_mac)
 
+        # for msg in messages:
+        #     print(msg)
+        # print(sorted(list(init_ids)+list(both_ids)))
+        # print(sorted(local_ids))
+        # print(sorted(external_ids))
+
 
-        # create bot port configs
+        #### Set realistic timestamps for messages ####
+        #### ... ####
+        
+
+        # create port configurations for the bots
         for bot in bot_configs:
             bot_configs[bot]["Port"] = gen_random_server_port()    
-        # print(bot_configs)
-
-        # create realistic ttl for every bot
-        # Gamma distribution parameters derived from MAWI 13.8G dataset
-        ids = bot_configs.keys()
-        alpha, loc, beta = (2.3261710235, -0.188306914406, 44.4853123884)
-        gd = gamma.rvs(alpha, loc=loc, scale=beta, size=len(ids))
-
-        for pos, bot in enumerate(bot_configs):
-            is_invalid = True
-            pos_max = len(gd)
-            while is_invalid:
-                ttl = int(round(gd[pos]))
-                if 0 < ttl < 256:  # validity check
-                    is_invalid = False
-                else:
-                    pos = index_increment(pos, pos_max)
-            bot_configs[bot]["TTL"] = ttl
-
-        # Setup initial parameters for packet creation
-        BUFFER_SIZE = 1000
-        pkt_gen = PacketGenerator()
-        file_timestamp_prv = float(abstract_packets[packet_start_idx]["Time"])
-        pcap_timestamp = self.get_param_value(Param.INJECT_AT_TIMESTAMP)
-        padding = self.get_param_value(Param.PACKET_PADDING)
-        packets = deque(maxlen=BUFFER_SIZE)
-        total_pkts = 0
-        limit_packetcount = self.get_param_value(Param.PACKETS_LIMIT)
-        limit_duration = duration
-        duration = 0
-        path_attack_pcap = None
-        nl_requests = {}
 
-        # create packets to write to pcap file
-        for abst_packet in abstract_packets[packet_start_idx:packet_end_idx+1]:
-            # map/retrieve addresses to ids from input file
-            id_src, id_dst = abst_packet["Src"], abst_packet["Dst"]
-            if (not id_src in bot_configs) or (not id_dst in bot_configs):
-                continue
-
-            ip_src, ip_dst = bot_configs[id_src]["IP"], bot_configs[id_dst]["IP"]
-            mac_src, mac_dst = bot_configs[id_src]["MAC"], bot_configs[id_dst]["MAC"]
-            port_src, port_dst = bot_configs[id_src]["Port"], bot_configs[id_dst]["Port"]
-            ttl = bot_configs[id_src]["TTL"]
-            type_src, type_dst = bot_configs[id_src]["Type"], bot_configs[id_dst]["Type"]
+        # assign realistic TTL for every bot
+        assign_realistic_ttls(bot_configs)
 
-            # print("{0} --> {1}, {2} - {3}".format(id_src, id_dst, type_src, type_dst))
+        # put together the final messages including the full sender and receiver
+        # configurations (i.e. IP, MAC, port, ...) for easier later use
+        final_messages = []
+        new_id = 0
+        for msg in messages:
+            print(msg)
+            print(mapped_ids.keys())
+            type_src, type_dst = bot_configs[msg.src]["Type"], bot_configs[msg.dst]["Type"]
+            id_src, id_dst = msg.src, msg.dst
 
+            # sort out messages that do not have a suitable locality setting
             if type_src == "external" and type_dst == "external":
                 continue
             if comm_type == "external":
                 if type_src == "local" and type_dst == "local":
                     continue
 
+            msg.src, msg.dst = bot_configs[id_src], bot_configs[id_dst]
+            msg.src["ID"], msg.dst["ID"] = id_src, id_dst
+            msg.msg_id = new_id
+            new_id += 1
+            ### Important here to update refers, i.e. needed later?
+            final_messages.append(msg)
 
-            # update timestamps and duration
-            file_timestamp = float(abst_packet["Time"])
-            file_time_delta = file_timestamp - file_timestamp_prv
-            pcap_timestamp += file_time_delta
-            duration += file_time_delta
-            file_timestamp_prv = file_timestamp
-
-            # if total number of packets has been sent or the attack duration has been exceeded, stop
-            if ((limit_packetcount is not None and total_pkts >= limit_packetcount) or 
-                    (limit_duration is not None and duration >= limit_duration)):
-                break
-        
-            # create ip packet and add to packets list
-            message_type = self.msg_types[int(abst_packet["Type"])]
-            nl_size = 0
-
-            if message_type == MessageType.SALITY_NL_REPLY:
-                nl_size = randint(1, 25)
-            elif message_type == MessageType.TIMEOUT:
-                continue
+        return final_messages
 
-            packet = pkt_gen.generate_mmcom_packet(ip_src=ip_src, ip_dst=ip_dst, ttl=ttl, mac_src=mac_src, mac_dst=mac_dst, 
-                port_src=port_src, port_dst=port_dst, message_type=message_type, neighborlist_entries=nl_size)
-            PaddingGenerator.add_padding(packet, padding)
 
-            packet.time = pcap_timestamp
-            packets.append(packet)
-            total_pkts += 1
 
-            # Store timestamp of first packet (for attack label)
-            if total_pkts <= 1:
-                self.attack_start_utime = packets[0].time
-            elif total_pkts % BUFFER_SIZE == 0: # every 1000 packets write them to the pcap file (append)
-                packets = list(packets)
-                PaddingGenerator.equal_length(packets)
-                last_packet = packets[-1]
-                path_attack_pcap = self.write_attack_pcap(packets, True, path_attack_pcap)
-                packets = deque(maxlen=BUFFER_SIZE)
-
-        if len(packets) > 0:
-            packets = list(packets)
-            PaddingGenerator.equal_length(packets)
-            path_attack_pcap = self.write_attack_pcap(packets, True, path_attack_pcap)
-            last_packet = packets[-1]
 
-        # Store timestamp of last packet
-        self.attack_end_utime = last_packet.time
 
-        # Return packets sorted by packet by timestamp and total number of packets (sent)
-        return total_pkts , path_attack_pcap

+ 157 - 79
code/ID2TLib/CommunicationProcessor.py

@@ -6,11 +6,21 @@ from Attack.MembersMgmtCommAttack import Message
 EPS_TOLERANCE = 1e-13  # works for a difference of 0.1, no less
 
 class CommunicationProcessor():
+    """
+    Class to process parsed input CSV/XML data and retrieve a mapping or other information.
+    """
 
     def __init__(self, packets):
         self.packets = packets
 
     def set_mapping(self, packets, mapped_ids, id_comms):
+        """
+        Set the selected mapping for this communication processor.
+
+        :param packets: all packets contained in the mapped time frame
+        :param mapped_ids: the chosen IDs
+        :param id_comms: the communications between the mapped IDs within the mapped interval
+        """
         self.packets = packets
         self.ids = mapped_ids.keys()
         self.id_comms = id_comms
@@ -18,12 +28,12 @@ class CommunicationProcessor():
 
     def find_interval_with_most_comm(self, number_ids: int, max_int_time: float):
         """
-        Finds a time interval of the given seconds where the given number of ids communicate among themselves the most.
+        Finds a time interval of the given seconds where the given number of IDs communicate among themselves the most.
         
         :param packets: The packets containing the communication
-        :param number_ids: The number of ids that are to be considered
+        :param number_ids: The number of IDs that are to be considered
         :param max_int_time: A short description of the attack.
-        :return: A triple consisting of the ids, as well as start and end idx with respect to the given packets. 
+        :return: A triple consisting of the IDs, as well as start and end idx with respect to the given packets. 
         """
         packets = self.packets
 
@@ -75,9 +85,9 @@ class CommunicationProcessor():
 
         def get_msg_count_first_ids(msg_counts: list):
             """
-            Finds the ids that communicate among themselves the most with respect to the given message counts.
+            Finds the IDs that communicate among themselves the most with respect to the given message counts.
             :param msg_counts: a sorted list of message counts where each entry is a tuple of key and value
-            :return: The picked ids and their total message count as a tuple
+            :return: The picked IDs and their total message count as a tuple
             """
             # if order of most messages is important, use an additional list
             picked_ids = set()
@@ -88,18 +98,18 @@ class CommunicationProcessor():
                 count_picked_ids = len(picked_ids)
                 id_one, id_two = msg[0].split("-")
 
-                # if enough ids have been found, stop
+                # if enough IDs have been found, stop
                 if count_picked_ids >= number_ids:
                     break
 
-                # if two ids can be added without exceeding the desired number of ids, add them
+                # if two IDs can be added without exceeding the desired number of IDs, add them
                 if count_picked_ids - 2 <= number_ids:
                     picked_ids.add(id_one)
                     picked_ids.add(id_two)
                     total_msg_count += msg[1]
 
                 # if there is only room for one more id to be added, 
-                # find one that is already contained in the picked ids
+                # find one that is already contained in the picked IDs
                 else:
                     for j, msg in enumerate(msg_counts[i:]):
                         id_one, id_two = msg[0].split("-")
@@ -155,7 +165,7 @@ class CommunicationProcessor():
                 # get all message counts for communications that took place in the current intervall
                 nez_msg_counts = get_nez_msg_counts(msg_counts)
 
-                # if we have enough ids as specified by the caller, mark as possible interval
+                # if we have enough IDs as specified by the caller, mark as possible interval
                 if count_ids_in_msg_counts(nez_msg_counts) >= number_ids:
                     # possible_intervals.append((nez_msg_counts, packets[idx_low]["Time"], packets[idx_high-1]["Time"]))
                     possible_intervals.append((nez_msg_counts, idx_low, idx_high - 1))
@@ -174,13 +184,14 @@ class CommunicationProcessor():
             idx_high += 1
 
 
-        # now find the interval in which as many ids as specified communicate the most in the given time interval
+        # now find the interval in which as many IDs as specified communicate the most in the given time interval
         summed_intervals = []
         sum_intervals_idxs = []
         cur_highest_sum = 0
 
-        # for every interval compute the sum of msg_counts of the first most communicative ids and eventually find
-        # the interval(s) with most communication and its ids
+        # for every interval compute the sum of msg_counts of the first most communicative IDs and eventually find
+        # the interval(s) with most communication and its IDs
+        # on the side also store the communication count of the individual IDs
         for j, interval in enumerate(possible_intervals):
             msg_counts = interval[0].items()
             sorted_msg_counts = sorted(msg_counts, key=lambda x: x[1], reverse=True)
@@ -204,85 +215,30 @@ class CommunicationProcessor():
 
         return summed_intervals
 
-    def det_ext_and_local_ids(self, comm_type: str, prob_init_local: int, prob_rspnd_local: int):
-        init_ids, respnd_ids, both_ids = self.init_ids, self.respnd_ids, self.both_ids
-        id_comms = self.id_comms
-        external_ids = set()
-        local_ids = set()
-
-        def map_init_is_local(id_: int):
-            for id_comm in id_comms:
-                ids = id_comm.split("-")
-                other = ids[0] if id_ == ids[1] else ids[1] 
-                
-                # what if before other was external ...
-                if other in local_ids or other in external_ids:
-                    continue 
-
-                if comm_type == "mixed":
-                    other_pos = mixed_respnd_is_local.random()
-                    if other_pos == "local":
-                        local_ids.add(other)
-                    elif other_pos == "external":
-                        external_ids.add(other)
-                elif comm_type == "external":
-                    if not other in initiators:
-                        external_ids.add(other)
-
-        def map_init_is_external(id_: int):
-            for id_comm in id_comms:
-                ids = id_comm.split("-")
-                other = ids[0] if id_ == ids[1] else ids[1] 
-                
-                # what if before other was external ...
-                if other in local_ids or other in external_ids:
-                    continue 
-                if not other in initiators:
-                    local_ids.add(other)
-
-
-        if comm_type == "local":
-                local_ids = set(mapped_ids.keys())
-        else:
-            init_local_or_external = Lea.fromValFreqsDict({"local": prob_init_local*100, "external": (1-prob_init_local)*100})
-            mixed_respnd_is_local = Lea.fromValFreqsDict({"local": prob_rspnd_local*100, "external": (1-prob_rspnd_local)*100})
-
-            # assign IDs in 'both' local everytime for mixed? 
-            initiators = sorted(list(init_ids) + list(both_ids))
-            initiators = sorted(initiators, key=lambda id_:self.indv_id_counts[id_], reverse=True)
-
-            for id_ in initiators:
-                pos = init_local_or_external.random()          
-                if pos == "local":
-                    if id_ in external_ids:
-                        map_init_is_external(id_)
-                    else:
-                        local_ids.add(id_)
-                        map_init_is_local(id_)
-                elif pos == "external":
-                    if id_ in local_ids:
-                        map_init_is_local(id_)
-                    else:
-                        external_ids.add(id_)
-                        map_init_is_external(id_)
-
-        self.local_ids, self.external_ids = local_ids, external_ids
-        return local_ids, external_ids
 
     def det_id_roles_and_msgs(self, mtypes: dict):
         """
         Determine the role of every mapped ID. The role can be initiator, responder or both.
-        :param packets: the mapped section of abstract packets
-        :param all_ids: all IDs that were mapped/chosen
-        :return: a dict that for every ID contains its role
+        On the side also connect corresponding messages together to quickly find out
+        which reply belongs to which request and vice versa.
+
+        :param mtypes: a dict for fast number to enum type lookup of message types
+        :return: a 4-tuple as (initiator IDs, responder IDs, both IDs, messages)
         """
+
+        # setup initial variables and their values
         init_ids, respnd_ids, both_ids = set(), set(), set()
+        # msgs --> the filtered messages, msg_id --> an increasing ID to give every message an artificial primary key
         msgs, msg_id = [], 0
+        # kepp track of previous request to find connections
         prev_reqs = {}
         all_ids = self.ids
         packets = self.packets
 
         def process_initiator(id_: str):
+            """
+            Process the given ID as initiator and update the above sets accordingly.
+            """
             if id_ in both_ids:
                 pass
             elif not id_ in respnd_ids:
@@ -292,6 +248,9 @@ class CommunicationProcessor():
                 both_ids.add(id_)
 
         def process_responder(id_: str):
+            """
+            Process the given ID as responder and update the above sets accordingly.
+            """
             if id_ in both_ids:
                 pass
             elif not id_ in init_ids:
@@ -300,34 +259,153 @@ class CommunicationProcessor():
                 init_ids.remove(id_)
                 both_ids.add(id_)
 
+        # process every packet individually 
         for packet in packets:
             id_src, id_dst, msg_type, time = packet["Src"], packet["Dst"], int(packet["Type"]), float(packet["Time"])
+            # if if either one of the IDs is not mapped, continue
             if (not id_src in all_ids) or (not id_dst in all_ids):
                 continue
+
+            # convert message type number to enum type
             msg_type = mtypes[msg_type]
+
+            # process a request
             if msg_type in {MessageType.SALITY_HELLO, MessageType.SALITY_NL_REQUEST}:
+                # process each ID's role
                 process_initiator(id_src)
                 process_responder(id_dst)
+                # convert the abstract message into a message object to handle it better
                 msg_str = "{0}-{1}".format(id_src, id_dst)
                 msg = Message(msg_id, id_src, id_dst, msg_type, time)
                 msgs.append(msg)
                 prev_reqs[msg_str] = msg_id
 
+            # process a reply
             elif msg_type in {MessageType.SALITY_HELLO_REPLY, MessageType.SALITY_NL_REPLY}:
+                # process each ID's role
                 process_initiator(id_dst)
                 process_responder(id_src)
+                # convert the abstract message into a message object to handle it better
                 msg_str = "{0}-{1}".format(id_dst, id_src)
+                # find the request message ID for this response and set its reference index
                 refer_idx = prev_reqs[msg_str]
                 msgs[refer_idx].refer_msg_id = msg_id
                 # print(msgs[refer_idx])
                 msg = Message(msg_id, id_src, id_dst, msg_type, time, refer_idx)
                 msgs.append(msg)
+                # remove the request to this response from storage
                 del(prev_reqs[msg_str])
 
+            # for message ID only count actual messages
             if not msg_type == MessageType.TIMEOUT:
                 msg_id += 1
 
+        # store the retrieved information in this object for later use
         self.init_ids, self.respnd_ids, self.both_ids = init_ids, respnd_ids, both_ids
         self.messages = msgs
+
+        # return the retrieved information
         return init_ids, respnd_ids, both_ids, msgs
 
+
+    def det_ext_and_local_ids(self, comm_type: str, prob_init_local: int, prob_rspnd_local: int):
+        """
+        Map the given IDs to a locality (i.e. local or external} considering the given probabilities.
+
+        :param comm_type: the type of communication (i.e. local, external or mixed)
+        :param prob_init_local: the probabilty that an initiator ID is local
+        :param prob_rspnd_local: the probabilty that a responder is local
+        """
+        init_ids, respnd_ids, both_ids = self.init_ids, self.respnd_ids, self.both_ids
+        id_comms = self.id_comms
+        external_ids = set()
+        local_ids = set()
+
+        def map_init_is_local(id_:str):
+            """
+            Map the given ID as local and handle its communication partners' locality
+            """
+            # loop over all communication entries
+            for id_comm in id_comms:
+                # if id_comm does not contain the ID to be mapped, continue
+                if not (id_ == ids[0] or id_ == ids[1]):
+                    continue
+
+                ids = id_comm.split("-")
+                other = ids[0] if id_ == ids[1] else ids[1] 
+
+                # if other is already mapped, continue
+                if other in local_ids or other in external_ids:
+                    continue 
+
+                # if comm_type is mixed, other ID can be local or external
+                if comm_type == "mixed":
+                    other_pos = mixed_respnd_is_local.random()
+                    if other_pos == "local":
+                        local_ids.add(other)
+                    elif other_pos == "external":
+                        external_ids.add(other)
+
+                # if comm_type is external, other ID must be external to fulfill type
+                # exlude initiators not to throw away too much communication
+                elif comm_type == "external":
+                    if not other in initiators:
+                        external_ids.add(other)
+
+        def map_init_is_external(id_: int):
+            """
+            Map the given ID as external and handle its communication partners' locality
+            """
+            for id_comm in id_comms:
+                # if id_comm does not contain the ID to be mapped, continue
+                if not (id_ == ids[0] or id_ == ids[1]):
+                    continue
+
+                ids = id_comm.split("-")
+                other = ids[0] if id_ == ids[1] else ids[1] 
+
+                # if other is already mapped, continue
+                if other in local_ids or other in external_ids:
+                    continue 
+
+                if not other in initiators:
+                    local_ids.add(other)
+
+
+        # if comm_type is local, map all IDs to local
+        if comm_type == "local":
+                local_ids = set(mapped_ids.keys())
+        else:
+            # set up probabilistic chooser
+            init_local_or_external = Lea.fromValFreqsDict({"local": prob_init_local*100, "external": (1-prob_init_local)*100})
+            mixed_respnd_is_local = Lea.fromValFreqsDict({"local": prob_rspnd_local*100, "external": (1-prob_rspnd_local)*100})
+
+            # assign IDs in 'both' local everytime for mixed? 
+            # sort initiators by some order, to gain determinism
+            initiators = sorted(list(init_ids) + list(both_ids))
+            # sort by individual communication count to increase final communication count
+            # better to sort by highest count of 'shared' IDs in case of local comm_type?
+            initiators = sorted(initiators, key=lambda id_:self.indv_id_counts[id_], reverse=True)
+
+            for id_ in initiators:
+                pos = init_local_or_external.random()          
+                if pos == "local":
+                    # if id_ has already been mapped differently, its communication partners still have to be mapped
+                    if id_ in external_ids:
+                        map_init_is_external(id_)
+                    # otherwise, map as chosen above
+                    else:
+                        local_ids.add(id_)
+                        map_init_is_local(id_)
+                elif pos == "external":
+                    # if id_ has already been mapped differently, its communication partners still have to be mapped
+                    if id_ in local_ids:
+                        map_init_is_local(id_)
+                    # otherwise, map as chosen above
+                    else:
+                        external_ids.add(id_)
+                        map_init_is_external(id_)
+
+        self.local_ids, self.external_ids = local_ids, external_ids
+        return local_ids, external_ids
+