Browse Source

First files from the botnet-focused-id2t moved to the other id2t

Denis Waßmann 6 năm trước cách đây
mục cha
commit
7a4cc6e320

+ 23 - 0
code/Attack/AttackParameters.py

@@ -45,6 +45,25 @@ class Parameter(enum.Enum):
     CUSTOM_PAYLOAD = 'custom.payload'  # custom payload for ftp exploits
     CUSTOM_PAYLOAD_FILE = 'custom.payload.file'  # file that contains custom payload for ftp exploits
 
+    NAT_PRESENT = 'nat.present'  # if NAT is active, external computers cannot initiate a communication in MembersMgmtCommAttack
+    TTL_FROM_CAIDA = 'ttl.from.caida'  # if True, TTLs are assigned based on the TTL distributions from the CAIDA dataset
+
+    NUMBER_INITIATOR_BOTS = 'bots.count'
+    # recommended type: Filepath ------------------------------------
+    FILE_CSV = 'file.csv'  # filepath to CSV containing a communication pattern
+    FILE_XML = 'file.xml'  # filepath to XML containing a communication pattern
+    # recommended type: CommType ------------------------------------
+    COMM_TYPE = "comm.type"  # the locality of bots in botnet communication (e.g. local, external, mixed)
+    # recommended type: Percentage (0.0-1.0) ------------------------------------
+    IP_REUSE_TOTAL = 'ip.reuse.total'  # percentage of IPs in original PCAP to be reused
+    IP_REUSE_LOCAL = 'ip.reuse.local'  # percentage of private IPs in original PCAP to be reused
+    IP_REUSE_EXTERNAL = 'ip.reuse.external'  # percentage of public IPs in original PCAP to be reused
+    # recommended type: Positive Integer between 0 and 100 ------------------------------------
+    PACKET_PADDING = 'packet.padding'
+    # calculate the destination port based on the hostname (like some botnets do)
+    # otherwise the destination port is a normal ephemeral port
+    BOTNET_DST_PORT_CALCULATION = "botnet.dstportcalculation"
+
 
 class ParameterTypes(enum.Enum):
     """
@@ -61,3 +80,7 @@ class ParameterTypes(enum.Enum):
     TYPE_PACKET_POSITION = 7  # used to derive timestamp from parameter INJECT_AFTER_PACKET
     TYPE_DOMAIN = 8
     TYPE_STRING = 9
+    TYPE_FILEPATH = 10
+    TYPE_COMM_TYPE = 11
+    TYPE_PERCENTAGE = 12
+    TYPE_PADDING = 13

+ 65 - 0
code/Attack/BaseAttack.py

@@ -281,6 +281,51 @@ class BaseAttack(metaclass=abc.ABCMeta):
         domain = re.match(r'^(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$', val)
         return domain is not None
 
+    @staticmethod
+    def _is_filepath(val: str):
+        """
+        Verifies that the given string points to an existing file
+
+        :param filepath: The filepath as string
+        :return: True if the file at the given location exists, otherwise False
+        """
+        return os.path.isfile(val)
+
+    @staticmethod
+    def _is_comm_type(val: str):
+        """
+        Verifies that the given string is a valid communications type
+
+        :param comm_type: the type of communication as a string
+        :return: True if the given type is a valid communications type, otherwise False
+        """
+        comm_types = {"local", "external", "mixed"}
+        return val in comm_types
+
+    @staticmethod
+    def _is_percentage(val: float):
+        """
+        Verifies that the given float value is a valid percentage, i.e. between 0 and 1
+
+        :param percentage: the float to test for validity
+        :return: True if the given type is a valid percentage, otherwise False
+        """
+        if val >= 0 and val <= 1:
+            return True
+        return False
+
+    @staticmethod
+    def _is_padding(val: int):
+        """
+        Verifies that the given int is a valid padding size, i.e. between 0 and 100
+
+        :param padding: the padding to test for its size
+        :return: True if the given type is valid padding, False otherwise
+        """
+        if val >= 0 and val <= 100:
+            return True
+        return False
+
     #########################################
     # HELPER METHODS
     #########################################
@@ -388,6 +433,26 @@ class BaseAttack(metaclass=abc.ABCMeta):
                 value = (ts / 1000000)  # convert microseconds from getTimestampMuSec into seconds
         elif param_type == atkParam.ParameterTypes.TYPE_DOMAIN:
             is_valid = self._is_domain(value)
+        elif param_type == atkParam.ParameterTypes.TYPE_FILEPATH:
+            is_valid = self._is_filepath(value)
+        elif param_type == atkParam.ParameterTypes.TYPE_COMM_TYPE:
+            is_valid = self._is_comm_type(value)
+        elif param_type == atkParam.ParameterTypes.TYPE_PERCENTAGE:
+            is_valid, value = self._is_float(value)
+            if is_valid and (
+                    param_name in {atkParam.Parameter.IP_REUSE_TOTAL, atkParam.Parameter.IP_REUSE_LOCAL, atkParam.Parameter.IP_REUSE_EXTERNAL}):
+                is_valid = self._is_percentage(value)
+            else:
+                is_valid = False
+        elif param_type == atkParam.ParameterTypes.TYPE_PADDING:
+            if isinstance(value, int):
+                is_valid = True
+            elif isinstance(value, str) and value.isdigit():
+                is_valid = True
+                value = int(value)
+
+            if is_valid:
+                is_valid = self._is_padding(value)
 
         # add value iff validation was successful
         if is_valid:

+ 606 - 0
code/Attack/MembersMgmtCommAttack.py

@@ -0,0 +1,606 @@
+from enum import Enum
+from random import randint, randrange, choice, uniform
+from collections import deque
+from scipy.stats import gamma
+from lea import Lea
+from datetime import datetime
+import os
+
+from Attack import BaseAttack
+from Attack.AttackParameters import Parameter as Param
+from Attack.AttackParameters import ParameterTypes
+# from ID2TLib import PcapFile
+# from ID2TLib.PcapFile import PcapFile
+from ID2TLib.Ports import PortSelectors
+
+class MessageType(Enum):
+    """
+    Defines possible botnet message types
+    """
+
+    TIMEOUT = 3
+    SALITY_NL_REQUEST = 101
+    SALITY_NL_REPLY = 102
+    SALITY_HELLO = 103
+    SALITY_HELLO_REPLY = 104
+
+    def is_request(mtype):
+        return mtype in {MessageType.SALITY_HELLO, MessageType.SALITY_NL_REQUEST}
+
+    def is_response(mtype):
+        return mtype in {MessageType.SALITY_HELLO_REPLY, MessageType.SALITY_NL_REPLY}
+
+class Message():
+    INVALID_LINENO = -1
+
+    """
+    Defines a compact message type that contains all necessary information.
+    """
+    def __init__(self, msg_id: int, src, dst, type_: MessageType, time: float, refer_msg_id: int=-1, line_no = -1):
+        """
+        Constructs a message with the given parameters.
+
+        :param msg_id: the ID of the message
+        :param src: something identifiying the source, e.g. ID or configuration
+        :param dst: something identifiying the destination, e.g. ID or configuration
+        :param type_: the type of the message
+        :param time: the timestamp of the message
+        :param refer_msg_id: the ID this message is a request for or reply to. -1 if there is no related message.
+        :param line_no: The line number this message appeared in the original file
+        """
+        self.msg_id = msg_id
+        self.src = src
+        self.dst = dst
+        self.type = type_
+        self.time = time
+        self.refer_msg_id = refer_msg_id
+        # if similar fields to line_no should be added consider a separate class
+        self.line_no = line_no
+
+    def __str__(self):
+        str_ = "{0}. at {1}: {2}-->{3}, {4}, refer:{5}".format(self.msg_id, self.time, self.src, self.dst, self.type, self.refer_msg_id)
+        return str_
+
+
+from ID2TLib import FileUtils, Generator
+from ID2TLib.IPv4 import IPAddress
+from ID2TLib.PcapAddressOperations import PcapAddressOperations
+from ID2TLib.CommunicationProcessor import CommunicationProcessor
+from ID2TLib.Botnet.MessageMapping import MessageMapping
+from ID2TLib.PcapFile import PcapFile
+from Core.Statistics import Statistics
+
+
+class MembersMgmtCommAttack(BaseAttack.BaseAttack):
+    def __init__(self):
+        """
+        Creates a new instance of the Membership Management Communication.
+
+        """
+        # Initialize communication
+        super(MembersMgmtCommAttack, self).__init__("Membership Management Communication Attack (MembersMgmtCommAttack)",
+                                        "Injects Membership Management Communication", "Botnet communication")
+
+        # Define allowed parameters and their type
+        self.supported_params = {
+            # parameters regarding attack
+            Param.INJECT_AT_TIMESTAMP: ParameterTypes.TYPE_FLOAT,
+            Param.INJECT_AFTER_PACKET: ParameterTypes.TYPE_PACKET_POSITION,
+            Param.PACKETS_PER_SECOND: ParameterTypes.TYPE_FLOAT,
+            Param.PACKETS_LIMIT: ParameterTypes.TYPE_INTEGER_POSITIVE,
+            Param.ATTACK_DURATION: ParameterTypes.TYPE_INTEGER_POSITIVE,
+
+            # use num_attackers to specify number of communicating devices?
+            Param.NUMBER_INITIATOR_BOTS: ParameterTypes.TYPE_INTEGER_POSITIVE,
+
+            # input file containing botnet communication
+            Param.FILE_CSV: ParameterTypes.TYPE_FILEPATH,
+            Param.FILE_XML: ParameterTypes.TYPE_FILEPATH,
+
+            # the percentage of IP reuse (if total and other is specified, percentages are multiplied)
+            Param.IP_REUSE_TOTAL: ParameterTypes.TYPE_PERCENTAGE,
+            Param.IP_REUSE_LOCAL: ParameterTypes.TYPE_PERCENTAGE,
+            Param.IP_REUSE_EXTERNAL: ParameterTypes.TYPE_PERCENTAGE,
+
+            # the user-selected padding to add to every packet
+            Param.PACKET_PADDING: ParameterTypes.TYPE_PADDING,
+
+            # presence of NAT at the gateway of the network
+            Param.NAT_PRESENT: ParameterTypes.TYPE_BOOLEAN,
+
+            # the base PCAP for the TTL distribution
+            Param.TTL_FROM_CAIDA: ParameterTypes.TYPE_BOOLEAN,
+            Param.BOTNET_DST_PORT_CALCULATION: ParameterTypes.TYPE_BOOLEAN
+        }
+
+        # create dict with MessageType values for fast name lookup
+        self.msg_types = {}
+        for msg_type in MessageType:
+            self.msg_types[msg_type.value] = msg_type
+
+    def init_params(self):
+        """
+        Initialize some parameters of this communication-attack using the user supplied command line parameters.
+        The remaining parameters are implicitly set in the provided data file. Note: the timestamps in the file
+        have to be sorted in ascending order
+
+        :param statistics: Reference to a statistics object.
+        """
+        # set class constants
+        self.DEFAULT_XML_PATH = "resources/MembersMgmtComm_example.xml"
+        # probability for responder ID to be local if comm_type is mixed
+        self.PROB_RESPND_IS_LOCAL = 0
+
+        # PARAMETERS: initialize with default values
+        # (values are overwritten if user specifies them)
+        self.add_param_value(Param.INJECT_AFTER_PACKET, 1 + randint(0, self.statistics.get_packet_count() // 5))
+
+        self.add_param_value(Param.PACKETS_PER_SECOND, 0)
+        self.add_param_value(Param.FILE_XML, self.DEFAULT_XML_PATH)
+
+        # Alternatively new attack parameter?
+        duration = int(float(self._get_capture_duration()))
+        self.add_param_value(Param.ATTACK_DURATION, duration)
+        self.add_param_value(Param.NUMBER_INITIATOR_BOTS, 1)
+        # NAT on by default
+        self.add_param_value(Param.NAT_PRESENT, True)
+
+        # TODO: change 1 to something better
+        self.add_param_value(Param.IP_REUSE_TOTAL, 1)
+        self.add_param_value(Param.IP_REUSE_LOCAL, 0.5)
+        self.add_param_value(Param.IP_REUSE_EXTERNAL, 0.5)
+
+        # add default additional padding
+        self.add_param_value(Param.PACKET_PADDING, 20)
+
+        # choose the input PCAP as default base for the TTL distribution
+        self.add_param_value(Param.TTL_FROM_CAIDA, False)
+        self.add_param_value(Param.BOTNET_DST_PORT_CALCULATION, True)
+
+
+    def generate_attack_pcap(self, context):
+        """
+        Injects the packets of this attack into a PCAP and stores it as a temporary file.
+        :param context: the context of the attack, containing e.g. files that are to be created
+        :return: a tuple of the number packets injected and the path to the temporary attack PCAP
+        """
+
+        # create the final messages that have to be sent, including all bot configurations
+        messages = self._create_messages(context)
+
+        if messages == []:
+            return 0, []
+
+        # Setup (initial) parameters for packet creation loop
+        BUFFER_SIZE = 1000
+        pkt_gen = Generator.PacketGenerator()
+        padding = self.get_param_value(Param.PACKET_PADDING)
+        packets = deque(maxlen=BUFFER_SIZE)
+        total_pkts = 0
+        limit_packetcount = self.get_param_value(Param.PACKETS_LIMIT)
+        limit_duration = self.get_param_value(Param.ATTACK_DURATION)
+        path_attack_pcap = None
+        overThousand = False
+
+        msg_packet_mapping = MessageMapping(messages)
+
+        # create packets to write to PCAP file
+        for msg in messages:
+            # retrieve the source and destination configurations
+            id_src, id_dst = msg.src["ID"], msg.dst["ID"]
+            ip_src, ip_dst = msg.src["IP"], msg.dst["IP"]
+            mac_src, mac_dst = msg.src["MAC"], msg.dst["MAC"]
+            if msg.type.is_request():
+                port_src, port_dst = int(msg.src["SrcPort"]), int(msg.dst["DstPort"])
+            else:
+                port_src, port_dst = int(msg.src["DstPort"]), int(msg.dst["SrcPort"])
+            ttl = int(msg.src["TTL"])
+
+            # update duration
+            duration = msg.time - messages[0].time
+
+            # if total number of packets has been sent or the attack duration has been exceeded, stop
+            if ((limit_packetcount is not None and total_pkts >= limit_packetcount) or
+                    (limit_duration is not None and duration >= limit_duration)):
+                break
+
+            # if the type of the message is a NL reply, determine the number of entries
+            nl_size = 0
+            if msg.type == MessageType.SALITY_NL_REPLY:
+                nl_size = randint(1, 25)    # what is max NL entries?
+
+            # create suitable IP/UDP packet and add to packets list
+            packet = pkt_gen.generate_mmcom_packet(ip_src=ip_src, ip_dst=ip_dst, ttl=ttl, mac_src=mac_src, mac_dst=mac_dst,
+                port_src=port_src, port_dst=port_dst, message_type=msg.type, neighborlist_entries=nl_size)
+            Generator.add_padding(packet, padding,True, True)
+
+            packet.time = msg.time
+            packets.append(packet)
+            msg_packet_mapping.map_message(msg, packet)
+            total_pkts += 1
+
+            # Store timestamp of first packet (for attack label)
+            if total_pkts <= 1:
+                self.attack_start_utime = packets[0].time
+            elif total_pkts % BUFFER_SIZE == 0: # every 1000 packets write them to the PCAP file (append)
+                if overThousand: # if over 1000 packets written, there may be a different packet-length for the last few packets 
+                    packets = list(packets)
+                    Generator.equal_length(packets, length = max_len, padding = padding, force_len = True)
+                    last_packet = packets[-1]
+                    path_attack_pcap = self.write_attack_pcap(packets, True, path_attack_pcap)
+                    packets = deque(maxlen=BUFFER_SIZE)
+                else:
+                    packets = list(packets)
+                    Generator.equal_length(packets, padding = padding)
+                    last_packet = packets[-1]
+                    max_len = len(last_packet)
+                    overThousand = True
+                    path_attack_pcap = self.write_attack_pcap(packets, True, path_attack_pcap)
+                    packets = deque(maxlen=BUFFER_SIZE)
+
+        # if there are unwritten packets remaining, write them to the PCAP file
+        if len(packets) > 0:
+            if overThousand:
+                packets = list(packets)
+                Generator.equal_length(packets, length = max_len, padding = padding, force_len = True)
+                path_attack_pcap = self.write_attack_pcap(packets, True, path_attack_pcap)
+                last_packet = packets[-1]
+            else:
+                packets = list(packets)
+                Generator.equal_length(packets, padding = padding)
+                path_attack_pcap = self.write_attack_pcap(packets, True, path_attack_pcap)
+                last_packet = packets[-1]
+
+        # write the mapping to a file
+        msg_packet_mapping.write_to(context.allocate_file("_mapping.xml"))
+
+        # Store timestamp of last packet
+        self.attack_end_utime = last_packet.time
+
+        # Return packets sorted by packet by timestamp and total number of packets (sent)
+        return total_pkts , path_attack_pcap
+
+
+    def generate_attack_packets(self):
+        pass
+
+    def _create_messages(self, context):
+        """
+        Creates the messages that are to be injected into the PCAP.
+        :param context: the context of the attack, containing e.g. files that are to be created
+        :return: the final messages as a list
+        """
+
+        def add_ids_to_config(ids_to_add: list, existing_ips: list, new_ips: list, bot_configs: dict, idtype:str="local", router_mac:str=""):
+            """
+            Creates IP and MAC configurations for the given IDs and adds them to the existing configurations object.
+
+            :param ids_to_add: all sorted IDs that have to be configured and added
+            :param existing_ips: the existing IPs in the PCAP file that should be assigned to some, or all, IDs
+            :param new_ips: the newly generated IPs that should be assigned to some, or all, IDs
+            :param bot_configs: the existing configurations for the bots
+            :param idtype: the locality type of the IDs
+            :param router_mac: the MAC address of the router in the PCAP
+            """
+
+            ids = ids_to_add.copy()
+            # macgen only needed, when IPs are new local IPs (therefore creating the object here suffices for the current callers
+            # to not end up with the same MAC paired with different IPs)
+            macgen = Generator.MacAddressGenerator()
+
+            # assign existing IPs and the corresponding MAC addresses in the PCAP to the IDs
+            for ip in existing_ips:
+                random_id = choice(ids)
+                mac = self.statistics.process_db_query("macAddress(IPAddress=%s)" % ip)
+                bot_configs[random_id] = {"Type": idtype, "IP": ip, "MAC": mac}
+                ids.remove(random_id)
+
+            # assign new IPs and for local IPs new MACs or for external IPs the router MAC to the IDs
+            for ip in new_ips:
+                random_id = choice(ids)
+                if idtype == "local":
+                    mac = macgen.random_mac()
+                elif idtype == "external":
+                    mac = router_mac
+                bot_configs[random_id] = {"Type": idtype, "IP": ip, "MAC": mac}
+                ids.remove(random_id)
+
+        def index_increment(number: int, max: int):
+            """
+            Number increment with rollover.
+            """
+            if number + 1 < max:
+                return number + 1
+            else:
+                return 0
+
+        def assign_realistic_ttls(bot_configs:list):
+            '''
+            Assigns a realisitic ttl to each bot from @param: bot_configs. Uses statistics and distribution to be able
+            to calculate a realisitc ttl.
+            :param bot_configs: List that contains all bots that should be assigned with realistic ttls.
+            '''
+            ids = sorted(bot_configs.keys())
+            for pos,bot in enumerate(ids):
+                bot_type = bot_configs[bot]["Type"]
+                # print(bot_type)
+                if(bot_type == "local"): # Set fix TTL for local Bots
+                    bot_configs[bot]["TTL"] = 128
+                    # Set TTL based on TTL distribution of IP address
+                else: # Set varying TTl for external Bots
+                    bot_ttl_dist = self.statistics.get_ttl_distribution(bot_configs[bot]["IP"])
+                    if len(bot_ttl_dist) > 0:
+                         source_ttl_prob_dict = Lea.fromValFreqsDict(bot_ttl_dist)
+                         bot_configs[bot]["TTL"] = source_ttl_prob_dict.random()
+                    else:
+                         bot_configs[bot]["TTL"] = self.statistics.process_db_query("most_used(ttlValue)")
+
+        def assign_realistic_timestamps(messages: list, external_ids: set, local_ids: set, avg_delay_local:float, avg_delay_external: float, zero_reference:float):
+            """
+            Assigns realistic timestamps to a set of messages
+
+            :param messages: the set of messages to be updated
+            :param external_ids: the set of bot ids, that are outside the network, i.e. external
+            :param local_ids: the set of bot ids, that are inside the network, i.e. local
+            :avg_delay_local: the avg_delay between the dispatch and the reception of a packet between local computers
+            :avg_delay_external: the avg_delay between the dispatch and the reception of a packet between a local and an external computer
+            :zero_reference: the timestamp which is regarded as the beginning of the pcap_file and therefore handled like a timestamp that resembles 0
+            """
+            updated_msgs = []
+            last_response = {}      # Dict, takes a tuple of 2 Bot_IDs as a key (requester, responder), returns the time of the last response, the requester received
+                                    # necessary in order to make sure, that additional requests are sent only after the response to the last one was received
+            for msg in messages:    # init
+                last_response[(msg.src, msg.dst)] = -1
+
+            # update all timestamps
+            for req_msg in messages:
+
+                if(req_msg in updated_msgs):
+                    # message already updated
+                    continue
+
+                # if req_msg.timestamp would be before the timestamp of the response to the last request, req_msg needs to be sent later (else branch)
+                if last_response[(req_msg.src, req_msg.dst)] == -1 or last_response[(req_msg.src, req_msg.dst)] < (zero_reference + req_msg.time - 0.05):
+                    ## update req_msg timestamp with a variation of up to 50ms
+                    req_msg.time = zero_reference + req_msg.time + uniform(-0.05, 0.05)
+                    updated_msgs.append(req_msg)
+
+                else:
+                    req_msg.time = last_response[(req_msg.src, req_msg.dst)] + 0.06 + uniform(-0.05, 0.05)
+
+                # update response if necessary
+                if req_msg.refer_msg_id != -1:
+                    respns_msg = messages[req_msg.refer_msg_id]
+
+                    # check for local or external communication and update response timestamp with the respective avg delay
+                    if req_msg.src in external_ids or req_msg.dst in external_ids:
+                        #external communication
+                        respns_msg.time = req_msg.time + avg_delay_external + uniform(-0.1*avg_delay_external, 0.1*avg_delay_external)
+
+                    else:
+                        #local communication
+                        respns_msg.time = req_msg.time + avg_delay_local + uniform(-0.1*avg_delay_local, 0.1*avg_delay_local)
+
+                    updated_msgs.append(respns_msg)
+                    last_response[(req_msg.src, req_msg.dst)] = respns_msg.time
+
+        def assign_ttls_from_caida(bot_configs):
+            """
+            Assign realistic TTL values to bots with respect to their IP, based on the CAIDA dataset.
+            If there exists an entry for a bot's IP, the TTL is chosen based on a distribution over all used TTLs by this IP.
+            If there is no such entry, the TTL is chosen based on a distribution over all used TTLs and their respective frequency.
+
+            :param bot_configs: the existing bot configurations
+            """
+            # Mapping IP to ASN: http://www.team-cymru.org/IP-ASN-mapping.html
+            # Why not assign TTLs for unknown IPs like this?
+
+            def get_ip_ttl_distrib():
+                """
+                Parses the CSV file containing a mapping between IP and their used TTLs.
+                :return: returns a dict with the IPs as keys and dicts for their TTL disribution as values
+                """
+                ip_based_distrib = {}
+                with open("resources/CaidaTTL_perIP.csv", "r") as file:
+                    # every line consists of: IP, TTL, Frequency
+                    next(file)  # skip CSV header line
+                    for line in file:
+                        ip_addr, ttl, freq = line.split(",")
+                        if ip_addr not in ip_based_distrib:
+                            ip_based_distrib[ip_addr] = {}  # the values for ip_based_distrib are dicts with key=TTL, value=Frequency
+                        ip_based_distrib[ip_addr][ttl] = int(freq)
+
+                return ip_based_distrib
+
+            def get_total_ttl_distrib():
+                """
+                Parses the CSV file containing an overview of all used TTLs and their respective frequency.
+                :return: returns a dict with the TTLs as keys and their frequencies as keys
+                """
+
+                total_ttl_distrib = {}
+                with open("resources/CaidaTTL_total.csv", "r") as file:
+                    # every line consists of: TTL, Frequency, Fraction
+                    next(file)  # skip CSV header line
+                    for line in file:
+                        ttl, freq, _ = line.split(",")
+                        total_ttl_distrib[ttl] = int(freq)
+
+                return total_ttl_distrib
+
+            # get the TTL distribution for every IP that is available in "resources/CaidaTTL_perIP.csv"
+            ip_ttl_distrib = get_ip_ttl_distrib()
+            # build a probability dict for the total TTL distribution
+            total_ttl_prob_dict = Lea.fromValFreqsDict(get_total_ttl_distrib())
+
+            # loop over every bot id and assign a TTL to the respective bot
+            for bot_id in sorted(bot_configs):
+                bot_type = bot_configs[bot_id]["Type"]
+                bot_ip = bot_configs[bot_id]["IP"]
+
+                if bot_type == "local":
+                    bot_configs[bot_id]["TTL"] = 128
+
+                # if there exists detailed information about the TTL distribution of this IP
+                elif bot_ip in ip_ttl_distrib:
+                    ip_ttl_freqs = ip_ttl_distrib[bot_ip]
+                    source_ttl_prob_dict = Lea.fromValFreqsDict(ip_ttl_freqs)  # build a probability dict from this IP's TTL distribution
+                    bot_configs[bot_id]["TTL"] = source_ttl_prob_dict.random()
+
+                # otherwise assign a random TTL based on the total TTL distribution
+                else:
+                    bot_configs[bot_id]["TTL"] = total_ttl_prob_dict.random()
+
+
+        def move_xml_to_outdir(filepath_xml: str):
+            """
+            Moves the XML file at filepath_xml to the output directory of the PCAP
+            :param filepath_xml: the filepath to the XML file
+            :return: the new filepath to the XML file
+            """
+
+            pcap_dir = context.get_output_dir()
+            xml_name = os.path.basename(filepath_xml)
+            if pcap_dir.endswith("/"):
+                new_xml_path = pcap_dir + xml_name
+            else:
+                new_xml_path = pcap_dir + "/" + xml_name
+            os.rename(filepath_xml, new_xml_path)
+            context.add_other_created_file(new_xml_path)
+            return new_xml_path
+
+        # parse input CSV or XML
+        filepath_xml = self.get_param_value(Param.FILE_XML)
+        filepath_csv = self.get_param_value(Param.FILE_CSV)
+
+        # prefer XML input over CSV input (in case both are given)
+        if filepath_csv and filepath_xml == self.DEFAULT_XML_PATH:
+            filepath_xml = FileUtils.parse_csv_to_xml(filepath_csv)
+            filepath_xml = move_xml_to_outdir(filepath_xml)
+
+
+        abstract_packets = FileUtils.parse_xml(filepath_xml)
+
+        # find a good communication mapping in the input file that matches the users parameters
+        duration = self.get_param_value(Param.ATTACK_DURATION)
+        number_init_bots = self.get_param_value(Param.NUMBER_INITIATOR_BOTS)
+        nat = self.get_param_value(Param.NAT_PRESENT)
+        comm_proc = CommunicationProcessor(abstract_packets, self.msg_types, nat)
+
+        comm_intervals = comm_proc.find_interval_most_comm(number_init_bots, duration)
+        if comm_intervals == []:
+            print("Error: There is no interval in the given CSV/XML that has enough communication initiating bots.")
+            return []
+        comm_interval = comm_intervals[randrange(0, len(comm_intervals))]
+
+        # retrieve the mapping information
+        mapped_ids, packet_start_idx, packet_end_idx = comm_interval["IDs"], comm_interval["Start"], comm_interval["End"]
+        # print(mapped_ids)
+        while len(mapped_ids) > number_init_bots:
+            rm_idx = randrange(0, len(mapped_ids))
+            del mapped_ids[rm_idx]
+
+        # assign the communication processor this mapping for further processing
+        comm_proc.set_mapping(abstract_packets[packet_start_idx:packet_end_idx+1], mapped_ids)
+        # print start and end time of mapped interval
+        # print(abstract_packets[packet_start_idx]["Time"])
+        # print(abstract_packets[packet_end_idx]["Time"])
+        # print(mapped_ids)
+
+        # determine number of reused local and external IPs
+        reuse_percent_total = self.get_param_value(Param.IP_REUSE_TOTAL)
+        reuse_percent_external = self.get_param_value(Param.IP_REUSE_EXTERNAL)
+        reuse_percent_local = self.get_param_value(Param.IP_REUSE_LOCAL)
+        reuse_count_external = int(reuse_percent_total * reuse_percent_external * len(mapped_ids))
+        reuse_count_local = int(reuse_percent_total * reuse_percent_local * len(mapped_ids))
+
+        # create locality, IP and MAC configurations for the IDs/Bots
+        ipgen = Generator.IPGenerator()
+        pcapops = PcapAddressOperations(self.statistics)
+        router_mac = pcapops.get_probable_router_mac()
+        bot_configs = {}
+        # determine the roles of the IDs in the mapping communication-{initiator, responder}
+        local_init_ids, external_init_ids, respnd_ids, messages = comm_proc.det_id_roles_and_msgs()
+        # use these roles to determine which IDs are to be local and which external
+        local_ids, external_ids = comm_proc.det_ext_and_local_ids()
+
+        # retrieve and assign the IPs and MACs for the bots with respect to the given parameters
+        # (IDs are always added to bot_configs in the same order under a given seed)
+        number_local_ids, number_external_ids = len(local_ids), len(external_ids)
+        # assign addresses for local IDs
+        if number_local_ids > 0:
+            reuse_count_local = int(reuse_percent_total * reuse_percent_local * number_local_ids)
+            existing_local_ips = sorted(pcapops.get_existing_local_ips(reuse_count_local))
+            new_local_ips = sorted(pcapops.get_new_local_ips(number_local_ids - len(existing_local_ips)))
+            add_ids_to_config(sorted(local_ids), existing_local_ips, new_local_ips, bot_configs)
+
+        # assign addresses for external IDs
+        if number_external_ids > 0:
+            reuse_count_external = int(reuse_percent_total * reuse_percent_external * number_external_ids)
+            existing_external_ips = sorted(pcapops.get_existing_external_ips(reuse_count_external))
+            remaining = len(external_ids) - len(existing_external_ips)
+
+            for external_ip in existing_external_ips: ipgen.add_to_blacklist(external_ip)
+            new_external_ips = sorted([ipgen.random_ip() for _ in range(remaining)])
+            add_ids_to_config(sorted(external_ids), existing_external_ips, new_external_ips, bot_configs, idtype="external", router_mac=router_mac)
+
+        # this is the timestamp at which the first packet should be injected, the packets have to be shifted to the beginning of the
+        # pcap file (INJECT_AT_TIMESTAMP) and then the offset of the packets have to be compensated to start at the given point in time
+        zero_reference = self.get_param_value(Param.INJECT_AT_TIMESTAMP) - messages[0].time
+
+        # calculate the average delay values for local and external responses
+        avg_delay_local, avg_delay_external = self.statistics.get_avg_delay_local_ext()
+
+        #set timestamps
+        assign_realistic_timestamps(messages, external_ids, local_ids, avg_delay_local, avg_delay_external, zero_reference)
+
+        portSelector = PortSelectors.LINUX
+        # create port configurations for the bots
+        calculate_dst_port = self.get_param_value(Param.BOTNET_DST_PORT_CALCULATION)
+        for bot in sorted(bot_configs):
+            bot_configs[bot]["SrcPort"] = portSelector.select_port_udp()
+            if calculate_dst_port:
+                bot_configs[bot]["DstPort"] = Generator.gen_random_server_port()
+            else:
+                bot_configs[bot]["DstPort"] = portSelector.select_port_udp()
+
+        # assign realistic TTL for every bot
+        if self.get_param_value(Param.TTL_FROM_CAIDA):
+            assign_ttls_from_caida(bot_configs)
+        else:
+            assign_realistic_ttls(bot_configs)
+
+        # put together the final messages including the full sender and receiver
+        # configurations (i.e. IP, MAC, port, ...) for easier later use
+        final_messages = []
+        messages = sorted(messages, key=lambda msg: msg.time)
+        new_id = 0
+
+        for msg in messages:
+            type_src, type_dst = bot_configs[msg.src]["Type"], bot_configs[msg.dst]["Type"]
+            id_src, id_dst = msg.src, msg.dst
+
+            # sort out messages that do not have a suitable locality setting
+            if type_src == "external" and type_dst == "external":
+                continue
+
+            msg.src, msg.dst = bot_configs[id_src], bot_configs[id_dst]
+            msg.src["ID"], msg.dst["ID"] = id_src, id_dst
+            msg.msg_id = new_id
+            new_id += 1
+            ### Important here to update refers, if needed later?
+            final_messages.append(msg)
+
+        return final_messages
+
+
+    def _get_capture_duration(self):
+        """
+        Returns the duration of the input PCAP (since statistics duration seems to be incorrect)
+        """
+        ts_date_format = "%Y-%m-%d %H:%M:%S.%f"
+        ts_first_date = datetime.strptime(self.statistics.get_pcap_timestamp_start(), ts_date_format)
+        ts_last_date = datetime.strptime(self.statistics.get_pcap_timestamp_end(), ts_date_format)
+        diff_date = ts_last_date - ts_first_date
+        duration = "%d.%d" % (diff_date.total_seconds(), diff_date.microseconds)
+        return duration

+ 62 - 0
code/ID2TLib/Botnet/MessageMapping.py

@@ -0,0 +1,62 @@
+import os.path
+from xml.dom.minidom import *
+import datetime
+
+
+class MessageMapping:
+    TAG_MAPPING_GROUP = "mappings"
+    TAG_MAPPING = "mapping"
+
+    ATTR_ID = "id"
+    ATTR_LINENO = "line_number"
+    ATTR_HAS_PACKET = "mapped"
+
+    ATTR_PACKET_TIME = "packet_time"
+
+    def __init__(self, messages):
+        self.messages = messages
+        self.id_to_packet = {}
+
+    def map_message(self, message, packet):
+        self.id_to_packet[message.msg_id] = packet
+
+    def to_xml(self):
+        doc = Document()
+
+        mappings = doc.createElement(self.TAG_MAPPING_GROUP)
+        doc.appendChild(mappings)
+
+        for message in self.messages:
+            mapping = doc.createElement(self.TAG_MAPPING)
+            mapping.setAttribute(self.ATTR_ID, str(message.msg_id))
+            mapping.setAttribute(self.ATTR_LINENO, str(message.line_no))
+
+            mapping.setAttribute("Src", str(message.src["ID"]))
+            mapping.setAttribute("Dst", str(message.dst["ID"]))
+            mapping.setAttribute("Type", str(message.type.value))
+
+            dt = datetime.datetime.fromtimestamp(message.time)
+            mapping.setAttribute("Time", str(message.time))
+            mapping.setAttribute("Time-Datetime", dt.strftime("%Y-%m-%d %H:%M:%S.") + str(dt.microsecond))
+            mapping.setAttribute("Time-Timeonly", dt.strftime("%H:%M:%S.") + str(dt.microsecond))
+
+            packet = self.id_to_packet.get(message.msg_id)
+            mapping.setAttribute(self.ATTR_HAS_PACKET, "true" if packet is not None else "false")
+            if packet:
+                mapping.setAttribute(self.ATTR_PACKET_TIME, str(packet.time))
+
+            mappings.appendChild(mapping)
+
+        return doc
+
+    def write_to(self, buffer, close = True):
+        buffer.write(self.to_xml().toprettyxml())
+        if close: buffer.close()
+
+    def write_to_file(self, filename: str, *args, **kwargs):
+        self.write_to(open(filename, "w", *args, **kwargs))
+
+    def write_next_to_pcap_file(self, pcap_filename : str, mapping_ext = "_mapping.xml", *args, **kwargs):
+        pcap_base = os.path.splitext(pcap_filename)[0]
+
+        self.write_to_file(pcap_base + mapping_ext, *args, **kwargs)

+ 0 - 0
code/ID2TLib/Botnet/__init__.py


+ 235 - 0
code/ID2TLib/CommunicationProcessor.py

@@ -0,0 +1,235 @@
+from lea import Lea
+from Attack.MembersMgmtCommAttack import MessageType
+from Attack.MembersMgmtCommAttack import Message
+
+# needed because of machine inprecision. E.g A time difference of 0.1s is stored as >0.1s
+EPS_TOLERANCE = 1e-13  # works for a difference of 0.1, no less
+
+def greater_than(a: float, b: float):
+    """
+    A greater than operator desgined to handle slight machine inprecision up to EPS_TOLERANCE.
+    :return: True if a > b, otherwise False
+    """
+    return b - a < -EPS_TOLERANCE
+
+class CommunicationProcessor():
+    """
+    Class to process parsed input CSV/XML data and retrieve a mapping or other information.
+    """
+
+    def __init__(self, packets:list, mtypes:dict, nat:bool):
+        """
+        Creates an instance of CommunicationProcessor.
+        :param packets: the list of abstract packets
+        :param mtypes: a dict containing an int to EnumType mapping of MessageTypes
+        :param nat: whether NAT is present in this network
+        """
+        self.packets = packets
+        self.mtypes = mtypes
+        self.nat = nat
+
+    def set_mapping(self, packets: list, mapped_ids: dict):
+        """
+        Set the selected mapping for this communication processor.
+
+        :param packets: all packets contained in the mapped time frame
+        :param mapped_ids: the chosen IDs
+        """
+        self.packets = packets
+        self.local_init_ids = set(mapped_ids)
+
+    def find_interval_most_comm(self, number_ids: int, max_int_time: float):
+        """
+        Finds the time interval(s) of the given seconds with the most overall communication (i.e. requests and responses)
+        that has at least number_ids communication initiators in it. 
+        :param number_ids: The number of initiator IDs that have to exist in the interval(s)
+        :param max_int_time: The maximum time period of the interval
+        :return: A list of triples, where each triple contains the initiator IDs, the start index and end index
+                 of the respective interval in that order. The indices are with respect to self.packets
+        """
+
+        # setup initial variables
+        packets = self.packets
+        mtypes = self.mtypes
+        idx_low, idx_high = 0, 0  # the indices spanning the interval
+        comm_sum = 0  # the communication sum of the current interval
+        cur_highest_sum = 0  # the highest communication sum seen so far
+        init_ids = []  # the initiator IDs seen in the current interval in order of appearance
+        possible_intervals = []  # all intervals that have cur_highest_sum of communication and contain enough IDs
+
+        # Iterate over all packets from start to finish and process the info of each packet.
+        # Similar to a Sliding Window approach.
+        while True:
+            if idx_high < len(packets):
+                cur_int_time = float(packets[idx_high]["Time"]) - float(packets[idx_low]["Time"])
+     
+            # if current interval time exceeds maximum time period, process information of the current interval
+            if greater_than(cur_int_time, max_int_time) or idx_high >= len(packets):
+                interval_ids = set(init_ids)
+                # if the interval contains enough initiator IDs, add it to possible_intervals
+                if len(interval_ids) >= number_ids:
+                    interval = {"IDs": sorted(interval_ids), "Start": idx_low, "End": idx_high-1}
+                    # reset possible intervals if new maximum of communication is found
+                    if comm_sum > cur_highest_sum:
+                        possible_intervals = [interval]
+                        cur_highest_sum = comm_sum
+                    # append otherwise
+                    elif comm_sum == cur_highest_sum:
+                        possible_intervals.append(interval)
+
+                # stop if all packets have been processed
+                if idx_high >= len(packets):
+                    break
+
+            # let idx_low "catch up" so that the current interval time fits into the maximum time period again
+            while greater_than(cur_int_time, max_int_time):
+                cur_packet = packets[idx_low]
+                # if message was no timeout, delete the first appearance of the initiator ID 
+                # of this packet from the initiator list and update comm_sum
+                if mtypes[int(cur_packet["Type"])] != MessageType.TIMEOUT:
+                    comm_sum -= 1
+                    del init_ids[0]
+
+                idx_low += 1
+                cur_int_time = float(packets[idx_high]["Time"]) - float(packets[idx_low]["Time"])
+
+            # consume the new packet at idx_high and process its information
+            cur_packet = packets[idx_high]
+            cur_mtype = mtypes[int(cur_packet["Type"])]
+            # if message is request, add src to initiator list
+            if MessageType.is_request(cur_mtype):
+                init_ids.append(cur_packet["Src"])
+                comm_sum += 1
+            # if message is response, add dst to initiator list
+            elif MessageType.is_response(cur_mtype):
+                init_ids.append(cur_packet["Dst"])
+                comm_sum += 1
+
+            idx_high += 1
+
+        return possible_intervals
+
+    def det_id_roles_and_msgs(self):
+        """
+        Determine the role of every mapped ID. The role can be initiator, responder or both.
+        On the side also connect corresponding messages together to quickly find out
+        which reply belongs to which request and vice versa.
+
+        :return: a triple as (initiator IDs, responder IDs, messages)
+        """
+
+        mtypes = self.mtypes
+        # setup initial variables and their values
+        respnd_ids = set()
+        # msgs --> the filtered messages, msg_id --> an increasing ID to give every message an artificial primary key
+        msgs, msg_id = [], 0
+        # keep track of previous request to find connections
+        prev_reqs = {}
+        # used to determine whether a request has been seen yet, so that replies before the first request are skipped and do not throw an error by
+        # accessing the empty dict prev_reqs (this is not a perfect solution, but it works most of the time)
+        req_seen = False
+        local_init_ids = self.local_init_ids
+        external_init_ids = set()
+
+        # process every packet individually 
+        for packet in self.packets:
+            id_src, id_dst, msg_type, time = packet["Src"], packet["Dst"], int(packet["Type"]), float(packet["Time"])
+            lineno = packet.get("LineNumber", -1)
+            # if if either one of the IDs is not mapped, continue
+            if (id_src not in local_init_ids) and (id_dst not in local_init_ids):
+                continue
+
+            # convert message type number to enum type
+            msg_type = mtypes[msg_type]
+
+            # process a request
+            if msg_type in {MessageType.SALITY_HELLO, MessageType.SALITY_NL_REQUEST}:
+                if not self.nat and id_dst in local_init_ids and id_src not in local_init_ids:
+                    external_init_ids.add(id_src)
+                elif id_src not in local_init_ids:
+                    continue
+                else:
+                    # process ID's role
+                    respnd_ids.add(id_dst)
+                # convert the abstract message into a message object to handle it better
+                msg_str = "{0}-{1}".format(id_src, id_dst)
+                msg = Message(msg_id, id_src, id_dst, msg_type, time, line_no = lineno)
+                msgs.append(msg)
+                prev_reqs[msg_str] = msg_id
+                msg_id += 1
+                req_seen = True
+
+            # process a reply
+            elif msg_type in {MessageType.SALITY_HELLO_REPLY, MessageType.SALITY_NL_REPLY} and req_seen:
+                if not self.nat and id_src in local_init_ids and id_dst not in local_init_ids:
+                    # process ID's role
+                    external_init_ids.add(id_dst)
+                elif id_dst not in local_init_ids:
+                    continue
+                else: 
+                    # process ID's role
+                    respnd_ids.add(id_src)
+                # convert the abstract message into a message object to handle it better
+                msg_str = "{0}-{1}".format(id_dst, id_src)
+                # find the request message ID for this response and set its reference index
+                refer_idx = prev_reqs[msg_str]
+                msgs[refer_idx].refer_msg_id = msg_id
+                msg = Message(msg_id, id_src, id_dst, msg_type, time, refer_idx, lineno)
+                msgs.append(msg)
+                # remove the request to this response from storage
+                del(prev_reqs[msg_str])
+                msg_id += 1
+
+            elif msg_type == MessageType.TIMEOUT and id_src in local_init_ids and not self.nat:
+                # convert the abstract message into a message object to handle it better
+                msg_str = "{0}-{1}".format(id_dst, id_src)
+                # find the request message ID for this response and set its reference index
+                refer_idx = prev_reqs.get(msg_str)
+                if refer_idx is not None:
+                    msgs[refer_idx].refer_msg_id = msg_id
+                    if msgs[refer_idx].type == MessageType.SALITY_NL_REQUEST:
+                        msg = Message(msg_id, id_src, id_dst, MessageType.SALITY_NL_REPLY, time, refer_idx, lineno)
+                    else:
+                        msg = Message(msg_id, id_src, id_dst, MessageType.SALITY_HELLO_REPLY, time, refer_idx, lineno)
+                    msgs.append(msg)
+                    # remove the request to this response from storage
+                    del(prev_reqs[msg_str])
+                    msg_id += 1
+
+        # store the retrieved information in this object for later use
+        self.respnd_ids = sorted(respnd_ids)
+        self.external_init_ids = sorted(external_init_ids)
+        self.messages = msgs
+
+        # return the retrieved information
+        return self.local_init_ids, self.external_init_ids, self.respnd_ids, self.messages
+
+    def det_ext_and_local_ids(self, prob_rspnd_local: int=0):
+        """
+        Map the given IDs to a locality (i.e. local or external} considering the given probabilities.
+
+        :param comm_type: the type of communication (i.e. local, external or mixed)
+        :param prob_rspnd_local: the probabilty that a responder is local
+        """
+        external_ids = set()
+        local_ids = self.local_init_ids.copy()
+        
+        # set up probabilistic chooser
+        rspnd_locality = Lea.fromValFreqsDict({"local": prob_rspnd_local*100, "external": (1-prob_rspnd_local)*100})
+
+        for id_ in self.external_init_ids:
+            external_ids.add(id_)
+
+        # determine responder localities
+        for id_ in self.respnd_ids:
+            if id_ in local_ids or id_ in external_ids:
+                continue 
+            
+            pos = rspnd_locality.random() 
+            if pos == "local":
+                local_ids.add(id_)
+            elif pos == "external":
+                external_ids.add(id_)
+
+        self.local_ids, self.external_ids = local_ids, external_ids
+        return self.local_ids, self.external_ids

+ 58 - 0
code/ID2TLib/FileUtils.py

@@ -0,0 +1,58 @@
+import xml.etree.ElementTree as ElementTree
+import csv
+import os
+
+def parse_xml(filepath: str):
+	'''
+	Parses an XML File
+	It is assumed, that packets are placed on the second hierarchical level and packetinformation is encoded as attributes
+
+	:param filepath: the path to the XML file to be parsed
+	:return: a List of Dictionaries, each Dictionary contains the information of one packet
+	'''
+
+	tree = ElementTree.parse(filepath)
+	root = tree.getroot()
+
+	#Convert Tree to List of Dictionaries
+	packets = []
+	for child in root:
+		packets.append(child.attrib)
+
+	return packets
+
+def parse_csv_to_xml(filepath: str):
+	'''
+	Converts a CSV file into an XML file. Every entry is converted to a child with respective attributes of the root node
+
+	:param filepath: the path to the CSV file to be parsed
+	:return: a path to the newly created XML file
+	'''
+
+	filename = os.path.splitext(filepath)[0]
+	# build a tree structure
+	root = ElementTree.Element("trace")
+	root.attrib["path"] = filename
+
+	# parse the csvFile into reader
+	with open(filepath, "rt") as csvFile:
+		reader = csv.reader(csvFile, delimiter=",")
+		# loop through the parsed file, creating packet-elements with the structure of the csvFile as attributes
+		lineno = -1 # lines start at zero
+		for line in reader:
+			lineno += 1
+			if not line:
+				continue
+
+			packet = ElementTree.SubElement(root, "packet")
+			for element in line:
+				element = element.replace(" ", "")
+				key, value = element.split(":")
+				packet.attrib[key] = str(value)
+			packet.attrib["LineNumber"] = str(lineno)
+
+	# writing the ElementTree into the .xml file
+	tree = ElementTree.ElementTree(root)
+	filepath = filename + ".xml"
+	tree.write(filepath)
+	return filepath

+ 396 - 0
code/ID2TLib/Generator.py

@@ -0,0 +1,396 @@
+import numpy.random as random2
+import random
+import string
+
+from numpy.random import bytes
+from random import getrandbits
+from scapy.layers.inet import IP, Ether, UDP, TCP
+from scapy.packet import Raw
+from Attack.MembersMgmtCommAttack import MessageType
+from ID2TLib import IPv4 as ip
+
+
+
+
+'''PaddingGenerator
+'''
+
+def add_padding(packet, bytes_padding:int = 0, user_padding:bool=True, rnd:bool = False):
+    '''
+    Adds padding to a packet with the given amount of bytes, but a maximum of 100 bytes, if called by the user.
+    :param packet: the packet that will be extended with the additional payload
+    :param bytes_padding: the amount of bytes that will be appended to the packet. Capped to 100,
+    if called by the user.
+    :param user_padding: true, if the function add_padding by the user and not within the code
+    :param rnd: adds a random padding between 0 and bytes_padding, if true
+    :return: the initial packet, extended with the wanted amount of bytes of padding
+    '''
+
+    if(user_padding == True and bytes_padding > 100):
+        bytes_padding = 100
+
+    if (rnd is True):
+        r = int(round(bytes_padding / 4))                  #sets bytes_padding to any number between 0 and bytes_padding
+        bytes_padding = random2.random_integers(0, r) * 4   #, that's dividable by 4
+    payload = generate_payload(bytes_padding)
+    packet[Raw].load += Raw(load=payload).load
+    return packet
+
+def equal_length(list_of_packets:list, length:int = 0, padding:int = 0, force_len:bool = False):
+    '''
+    Equals the length of all packets of a given list of packets to the given length. If the given length is smaller than the largest
+    packet, all the other packets are extended to the largest packet's length. Add additional padding
+    afterwards to create realism.
+    :param list_of_packets: The given set of packet.
+    :param length: The length each packet should have. Can be redundant, if the largest packet has more bytes
+    :param force_len: if true, all packets are forced to take on the length of param length
+    than length.
+    :return: The list of extended packets.
+    '''
+    if not force_len:
+        largest_packet = length
+        for packet in list_of_packets:
+            packet_length = len(packet)
+            if(packet_length > largest_packet):
+                largest_packet = packet_length
+    else:
+        largest_packet = length
+
+    for packet in list_of_packets:
+        bytes_padding = largest_packet - len(packet)
+        if(bytes_padding > 0):
+            add_padding(packet, bytes_padding, False, False) #Add padding to extend to param:length
+            add_padding(packet, padding, False, True) #Add random additional padding to create realism
+
+    return list_of_packets
+
+'''PayloadGenerator
+'''
+
+def generate_payload(size:int=0):
+
+	"""
+	Generates a payload of random bytes of the given amount
+
+	:param size: number of generated bytes
+    :return: the generated payload
+	"""
+
+	payload = bytes(size)
+
+	return payload
+
+
+'''PortGenerator
+'''
+
+def gen_random_server_port(offset: int=2199):
+    """
+    Generates a valid random first and last character for a bots hostname
+    and computes a port from these two characters.
+    The default offset is chosen from a Sality implementation in 2011
+    """
+    firstLetter = random.choice(string.ascii_letters);
+    lastLetter = random.choice(string.ascii_letters + string.digits);
+    return (offset + ord(firstLetter) * ord(lastLetter));
+
+
+'''MacAddressGenerator
+'''
+
+
+class MacAddressGenerator:
+    def __init__(self, include_broadcast_macs=False, include_virtual_macs=False):
+        self.broadcast = include_broadcast_macs
+        self.virtual = include_virtual_macs
+
+        self.generated = set()
+
+    def random_mac(self) -> str:
+        while True:
+            mac = self._random_mac()
+            if mac not in self.generated:
+                self.generated.add(mac)
+                return mac
+
+    def clear(self):
+        self.generated.clear()
+
+    def generates_broadcast_macs(self) -> bool:
+        return self.broadcast
+
+    def generates_virtual_macs(self) -> bool:
+        return self.virtual
+
+    def set_broadcast_generation(self, broadcast: bool):
+        self.broadcast = broadcast
+
+    def set_virtual_generation(self, virtual: bool):
+        self.virtual = virtual
+
+    def _random_mac(self) -> str:
+        mac_bytes = bytearray(getrandbits(8) for i in range(6))
+        if not self.broadcast:
+            mac_bytes[0] &= ~1  # clear the first bytes' first bit
+        if not self.virtual:
+            mac_bytes[0] &= ~2  # clear the first bytes' second bit
+
+        return ":".join("%02X" % b for b in mac_bytes)
+
+
+
+'''PacketGenerator
+'''
+
+
+class PacketGenerator():
+    """
+    Creates packets, based on the set protocol
+    """
+
+    def __init__(self, protocol="udp"):
+        """
+        Creates a new Packet_Generator Object
+
+        :param protocol: the protocol of the packets to be created, udp or tcp
+        """
+        super(PacketGenerator, self).__init__()
+        self.protocol = protocol
+
+    def generate_packet(self, ip_src: str = "192.168.64.32", ip_dst: str = "192.168.64.48",
+                        mac_src: str = "56:6D:D9:BC:70:1C",
+                        mac_dst: str = "F4:2B:95:B3:0E:1A", port_src: int = 1337, port_dst: int = 6442, ttl: int = 64,
+                        tcpflags: str = "S", payload: str = ""):
+        """
+        Creates a Packet with the specified Values for the current protocol
+
+        :param ip_src: the source IP address of the IP header
+        :param ip_dst the destination IP address of the IP header
+        :param mac_src: the source MAC address of the MAC header
+        :param mac_dst: the destination MAC address of the MAC header
+        :param port_src: the source port of the header
+        :param port_dst: the destination port of the header
+        :param ttl: the ttl Value of the packet
+        :param tcpflags: the TCP flags of the TCP header
+        :param payload: the payload of the packet
+        :return: the corresponding packet
+        """
+
+        if (self.protocol == "udp"):
+            packet = generate_udp_packet(ip_src=ip_src, ip_dst=ip_dst, mac_src=mac_src, mac_dst=mac_dst, ttl=ttl,
+                                         port_src=port_src, port_dst=port_dst, payload=payload)
+        elif (self.protocol == "tcp"):
+            packet = generate_tcp_packet(ip_src=ip_src, ip_dst=ip_dst, mac_src=mac_src, mac_dst=mac_dst, ttl=ttl,
+                                         port_src=port_src, port_dst=port_dst, tcpflags=tcpflags, payload=payload)
+        return packet
+
+    def generate_mmcom_packet(self, ip_src: str = "192.168.64.32", ip_dst: str = "192.168.64.48",
+                              mac_src: str = "56:6D:D9:BC:70:1C",
+                              mac_dst: str = "F4:2B:95:B3:0E:1A", port_src: int = 1337, port_dst: int = 6442,
+                              tcpflags: str = "S", ttl: int = 64,
+                              message_type: MessageType = MessageType.SALITY_HELLO, neighborlist_entries: int = 1):
+        """
+        Creates a Packet for Members-Management-Communication with the specified Values and the current protocol
+
+        :param ip_src: the source IP address of the IP header
+        :param ip_dst the destination IP address of the IP header
+        :param mac_src: the source MAC address of the MAC header
+        :param mac_dst: the destination MAC address of the MAC header
+        :param port_src: the source port of the header
+        :param port_dst: the destination port of the header
+        :param tcpflags: the TCP flags of the TCP header, if tcp is selected as protocol
+        :param ttl: the ttl Value of the packet
+        :param message_type: affects the size of the payload
+        :param neighborlist_entries: number of entries of a Neighbourlist-reply, affects the size of the payload
+        :return: the corresponding packet
+        """
+
+        # Determine length of the payload that has to be generated
+        if (message_type == MessageType.SALITY_HELLO):
+            payload_len = 0
+        elif (message_type == MessageType.SALITY_HELLO_REPLY):
+            payload_len = 22
+        elif (message_type == MessageType.SALITY_NL_REQUEST):
+            payload_len = 28
+        elif (message_type == MessageType.SALITY_NL_REPLY):
+            payload_len = 24 + 6 * neighborlist_entries
+        else:
+            payload_len = 0
+
+        payload = generate_payload(payload_len)
+
+        if (self.protocol == "udp"):
+            packet = generate_udp_packet(ip_src=ip_src, ip_dst=ip_dst, mac_src=mac_src, mac_dst=mac_dst, ttl=ttl,
+                                         port_src=port_src, port_dst=port_dst, payload=payload)
+        elif (self.protocol == "tcp"):
+            packet = generate_tcp_packet(ip_src=ip_src, ip_dst=ip_dst, mac_src=mac_src, mac_dst=mac_dst, ttl=ttl,
+                                         port_src=port_src, port_dst=port_dst, tcpflags=tcpflags, payload=payload)
+        else:
+            print("Error: unsupported protocol for generating Packets")
+
+        return packet
+
+
+def generate_tcp_packet(ip_src: str = "192.168.64.32", ip_dst: str = "192.168.64.48",
+                        mac_src: str = "56:6D:D9:BC:70:1C", ttl: int = 64,
+                        mac_dst: str = "F4:2B:95:B3:0E:1A", port_src: int = 1337, port_dst: int = 6442,
+                        tcpflags: str = "S", payload: str = ""):
+    """
+    Builds a TCP packet with the values specified by the caller.
+
+    :param ip_src: the source IP address of the IP header
+    :param ip_dst the destination IP address of the IP header
+    :param mac_src: the source MAC address of the MAC header
+    :param ttl: the ttl value of the packet
+    :param mac_dst: the destination MAC address of the MAC header
+    :param port_src: the source port of the TCP header
+    :param port_dst: the destination port of the TCP header
+    :param tcpflags: the TCP flags of the TCP header
+    :param payload: the payload of the packet
+    :return: the corresponding TCP packet
+    """
+
+    ether = Ether(src=mac_src, dst=mac_dst)
+    ip = IP(src=ip_src, dst=ip_dst, ttl=ttl)
+    tcp = TCP(sport=port_src, dport=port_dst, flags=tcpflags)
+    packet = ether / ip / tcp / Raw(load=payload)
+    return packet
+
+
+def generate_udp_packet(ip_src: str = "192.168.64.32", ip_dst: str = "192.168.64.48",
+                        mac_src: str = "56:6D:D9:BC:70:1C", ttl: int = 64,
+                        mac_dst: str = "F4:2B:95:B3:0E:1A", port_src: int = 1337, port_dst: int = 6442,
+                        payload: str = ""):
+    """
+    Builds an UDP packet with the values specified by the caller.
+
+    :param ip_src: the source IP address of the IP header
+    :param ip_dst the destination IP address of the IP header
+    :param mac_src: the source MAC address of the MAC header
+    :param ttl: the ttl value of the packet
+    :param mac_dst: the destination MAC address of the MAC header
+    :param port_src: the source port of the UDP header
+    :param port_dst: the destination port of the UDP header
+    :param payload: the payload of the packet
+    :return: the corresponding UDP packet
+    """
+
+    ether = Ether(src=mac_src, dst=mac_dst)
+    ip = IP(src=ip_src, dst=ip_dst, ttl=ttl)
+    udp = UDP(sport=port_src, dport=port_dst)
+    packet = ether / ip / udp / Raw(load=payload)
+    return packet
+
+'''IPGenerator
+'''
+
+
+class IPChooser:
+    def random_ip(self):
+        return ip.IPAddress.from_int(random.randrange(0, 1 << 32))
+
+    def size(self):
+        return 1 << 32
+
+    def __len__(self):
+        return self.size()
+
+
+class IPChooserByRange(IPChooser):
+    def __init__(self, ip_range):
+        self.range = ip_range
+
+    def random_ip(self):
+        start = int(self.range.first_address())
+        end = start + self.range.block_size()
+        return ip.IPAddress.from_int(random.randrange(start, end))
+
+    def size(self):
+        return self.range.block_size()
+
+
+class IPChooserByList(IPChooser):
+    def __init__(self, ips):
+        self.ips = list(ips)
+        if not self.ips:
+            raise ValueError("list of ips must not be empty")
+
+    def random_ip(self):
+        return random.choice(self.ips)
+
+    def size(self):
+        return len(self.ips)
+
+
+class IPGenerator:
+    def __init__(self, ip_chooser=IPChooser(),  # include all ip-addresses by default (before the blacklist)
+                 include_private_ips=False, include_localhost=False,
+                 include_multicast=False, include_reserved=False,
+                 include_link_local=False, blacklist=None):
+        self.blacklist = []
+        self.generated_ips = set()
+
+        if not include_private_ips:
+            for segment in ip.ReservedIPBlocks.PRIVATE_IP_SEGMENTS:
+                self.add_to_blacklist(segment)
+        if not include_localhost:
+            self.add_to_blacklist(ip.ReservedIPBlocks.LOCALHOST_SEGMENT)
+        if not include_multicast:
+            self.add_to_blacklist(ip.ReservedIPBlocks.MULTICAST_SEGMENT)
+        if not include_reserved:
+            self.add_to_blacklist(ip.ReservedIPBlocks.RESERVED_SEGMENT)
+        if not include_link_local:
+            self.add_to_blacklist(ip.ReservedIPBlocks.ZERO_CONF_SEGMENT)
+        if blacklist:
+            for segment in blacklist:
+                self.add_to_blacklist(segment)
+        self.chooser = ip_chooser
+
+    @staticmethod
+    def from_range(range, *args, **kwargs):
+        return IPGenerator(IPChooserByRange(range), *args, **kwargs)
+
+    def add_to_blacklist(self, ip_segment):
+        if isinstance(ip_segment, ip.IPAddressBlock):
+            self.blacklist.append(ip_segment)
+        else:
+            self.blacklist.append(ip.IPAddressBlock.parse(ip_segment))
+
+    def random_ip(self):
+        if len(self.generated_ips) == self.chooser.size():
+            raise ValueError("Exhausted the space of possible ip-addresses, no new unique ip-address can be generated")
+
+        while True:
+            random_ip = self.chooser.random_ip()
+
+            if not self._is_in_blacklist(random_ip) and random_ip not in self.generated_ips:
+                self.generated_ips.add(random_ip)
+                return str(random_ip)
+
+    def clear(self, clear_blacklist=True, clear_generated_ips=True):
+        if clear_blacklist: self.blacklist.clear()
+        if clear_generated_ips: self.generated_ips.clear()
+
+    def _is_in_blacklist(self, ip: ip.IPAddress):
+        return any(ip in block for block in self.blacklist)
+
+
+class MappingIPGenerator(IPGenerator):
+    def __init__(self, *args, **kwargs):
+        super().__init__(self, *args, **kwargs)
+
+        self.mapping = {}
+
+    def clear(self, clear_generated_ips=True, *args, **kwargs):
+        super().clear(self, clear_generated_ips=clear_generated_ips, *args, **kwargs)
+        if clear_generated_ips:
+            self.mapping = {}
+
+    def get_mapped_ip(self, key):
+        if key not in self.mapping:
+            self.mapping[key] = self.random_ip()
+
+        return self.mapping[key]
+
+    def __getitem__(self, item):
+        return self.get_mapped_ip(item)

+ 269 - 0
code/ID2TLib/IPv4.py

@@ -0,0 +1,269 @@
+import re
+
+
+class IPAddress:
+	"""
+	A simple class encapsulating an ip-address. An IPAddress can be constructed by string, int and 4-element-list
+	(e.g. [8, 8, 8, 8]). This is a leightweight class as it only contains string-to-ip-and-reverse-conversion
+	and some convenience methods.
+	"""
+	
+	# a number between 0 and 255, no leading zeros
+	_IP_NUMBER_REGEXP = r"(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)"
+	# 4 numbers between 0 and 255, joined together with dots
+	IP_REGEXP = r"{0}\.{0}\.{0}\.{0}".format(_IP_NUMBER_REGEXP)
+	
+	def __init__(self, intlist: "list[int]") -> "IPAddress":
+		"""
+		Construct an ipv4-address with a list of 4 integers, e.g. to construct the ip 10.0.0.0 pass [10, 0, 0, 0]
+		"""
+		if not isinstance(intlist, list) or not all(isinstance(n, int) for n in intlist):
+			raise TypeError("The first constructor argument must be an list of ints")
+		if not len(intlist) == 4 or not all(0 <= n <= 255 for n in intlist):
+			raise ValueError("The integer list must contain 4 ints in range of 0 and 255, like an ip-address")
+		
+		# For easier calculations store the ip as integer, e.g. 10.0.0.0 is 0x0a000000
+		self.ipnum = int.from_bytes(bytes(intlist), "big")
+	
+	@staticmethod
+	def parse(ip: str) -> "IPAddress":
+		"""
+		Parse an ip-address-string. If the string does not comply to the ipv4-format a ValueError is raised
+		:param ip: A string-representation of an ip-address, e.g. "10.0.0.0"
+		:return: IPAddress-object describing the ip-address
+		"""
+		match = re.match("^" + IPAddress.IP_REGEXP + "$", ip)
+		if not match:
+			raise ValueError("%s is no ipv4-address" % ip)
+		
+		# the matches we get are the numbers of the ip-address (match 0 is the whole ip-address)
+		numbers = [int(match.group(i)) for i in range(1, 5)]
+		return IPAddress(numbers)
+	
+	@staticmethod
+	def from_int(numeric: int) -> "IPAddress":
+		if numeric not in range(1 << 32):
+			raise ValueError("numeric value must be in uint-range")
+		
+		# to_bytes is the easiest way to split a 32-bit int into bytes
+		return IPAddress(list(numeric.to_bytes(4, "big")))
+	
+	@staticmethod
+	def is_ipv4(ip: str) -> bool:
+		"""
+		Check if the supplied string is in ipv4-format
+		"""
+		
+		match = re.match("^" + IPAddress.IP_REGEXP + "$", ip)
+		return True if match else False
+
+	def to_int(self) -> int:
+		"""
+		Convert the ip-address to a 32-bit uint, e.g. IPAddress.parse("10.0.0.255").to_int() returns 0x0a0000ff
+		"""
+		return self.ipnum
+	
+	def is_private(self) -> bool:
+		"""
+		Returns a boolean indicating if the ip-address lies in the private ip-segments (see ReservedIPBlocks)
+		"""
+		return ReservedIPBlocks.is_private(self)
+	
+	def get_private_segment(self) -> bool:
+		"""
+		Return the private ip-segment the ip-address belongs to (there are several)
+		If this ip does not belong to a private ip-segment a ValueError is raised
+		:return: IPAddressBlock
+		"""
+		return ReservedIPBlocks.get_private_segment(self)
+
+	def is_localhost(self) -> bool:
+		"""
+		Returns a boolean indicating if the ip-address lies in the localhost-segment
+		"""
+		return ReservedIPBlocks.is_localhost(self)
+	
+	def is_multicast(self) -> bool:
+		"""
+		Returns a boolean indicating if the ip-address lies in the multicast-segment
+		"""
+		return ReservedIPBlocks.is_multicast(self)
+	
+	def is_reserved(self) -> bool:
+		"""
+		Returns a boolean indicating if the ip-address lies in the reserved-segment
+		"""
+		return ReservedIPBlocks.is_reserved(self)
+	
+	def is_zero_conf(self) -> bool:
+		"""
+		Returns a boolean indicating if the ip-address lies in the zeroconf-segment
+		"""
+		return ReservedIPBlocks.is_zero_conf(self)
+	
+	def _tuple(self) -> (int,int,int,int):
+		return tuple(self.ipnum.to_bytes(4, "big"))
+	
+	def __repr__(self) -> str:
+		"""
+		Following the python style guide, eval(repr(obj)) should equal obj
+		"""
+		return "IPAddress([%i, %i, %i, %i])" % self._tuple()
+	
+	def __str__(self) -> str:
+		"""
+		Return the ip-address described by this object in ipv4-format
+		"""
+		return "%i.%i.%i.%i" % self._tuple()
+	
+	def __hash__(self) -> int:
+		return self.ipnum
+	
+	def __eq__(self, other) -> bool:
+		if other is None:
+			return False
+		
+		return isinstance(other, IPAddress) and self.ipnum == other.ipnum
+	
+	def __lt__(self, other) -> bool:
+		if other is None:
+			raise TypeError("Cannot compare to None")
+		if not isinstance(other, IPAddress):
+			raise NotImplemented # maybe other can compare to self
+		
+		return self.ipnum < other.ipnum
+	
+	def __int__(self) -> bool:
+		return self.ipnum
+
+class IPAddressBlock:
+	"""
+	This class describes a block of IPv4-addresses, just as a string in CIDR-notation does.
+	It can be seen as a range of ip-addresses. To check if a block contains a ip-address
+	simply use "ip in ip_block"
+	"""
+	
+	# this regex describes CIDR-notation (an ip-address plus "/XX", whereas XX is a number between 1 and 32)
+	CIDR_REGEXP = IPAddress.IP_REGEXP + r"(\/(3[0-2]|[12]?\d)|)?"
+	
+	def __init__(self, ip: "Union(str, list, IPAddress)", netmask = 32) -> "IPAddressBlock":
+		"""
+		Construct a ip-block given a ip-address and a netmask. Given an ip and a netmask,
+		the constructed ip-block will describe the range ip/netmask (e.g. 127.0.0.1/8)
+		:param ip: An ip-address, represented as IPAddress, string or 4-element-list
+		"""
+		if isinstance(ip, str):
+			ip = IPAddress.parse(ip)
+		elif isinstance(ip, list):
+			ip = IPAddress(ip)
+		
+		if not 1 <= netmask <= 32:
+			raise ValueError("netmask must lie between 1 and 32")
+		
+		# clear the unnecessary bits in the base-ip, e.g. this will convert 10.0.0.255/24 to 10.0.0.0/24 which are equivalent
+		self.ipnum = ip.to_int() & self._bitmask(netmask)
+		self.netmask = netmask
+
+	@staticmethod
+	def parse(cidr: str) -> "IPAddressBlock":
+		"""
+		Parse a string in cidr-notation and return a IPAddressBlock describing the ip-segment
+		If the string is not in cidr-notation a ValueError is raised
+		"""
+		
+		match = re.match("^" + IPAddressBlock.CIDR_REGEXP + "$", cidr)
+		if not match:
+			raise ValueError("%s is no valid cidr-notation" % cidr)
+		
+		ip = [int(match.group(i)) for i in range(1, 5)]
+		suffix = 32 if not match.group(6) else int(match.group(6))
+		
+		return IPAddressBlock(ip, suffix)
+	
+	def block_size(self) -> int:
+		"""
+		Return the size of the ip-address-block. E.g. the size of someip/24 is 256
+		"""
+		return 2 ** (32 - self.netmask)
+	
+	def first_address(self) -> IPAddress:
+		"""
+		Return the first ip-address of the ip-block
+		"""
+		return IPAddress.from_int(self.ipnum)
+
+	def last_address(self) -> IPAddress:
+		"""
+		Return the last ip-address of the ip-block
+		"""
+		return IPAddress.from_int(self.ipnum + self.block_size() - 1)
+
+	def _bitmask(self, netmask: int) -> int:
+		ones = lambda x: (1 << x) - 1
+		
+		return ones(32) ^ ones(32 - netmask)
+	
+	def __repr__(self) -> str:
+		"""
+		Conforming to python style-guide, eval(repr(obj)) equals obj
+		"""
+		return "IPAddressBlock(%s, %i)" % (repr(IPAddress.from_int(self.ipnum)), self.netmask)
+	
+	def __str__(self) -> str:
+		"""
+		Return a string in cidr-notation
+		"""
+		return str(IPAddress.from_int(self.ipnum)) + "/" + str(self.netmask)
+	
+	def __contains__(self, ip: IPAddress) -> bool:
+		return (ip.to_int() & self._bitmask(self.netmask)) == self.ipnum
+
+class ReservedIPBlocks:
+	"""
+	To avoid magic values and save developers some research this class contains several constants
+	describing special network-segments and some is_-methods to check if an ip is in the specified segment.
+	"""
+	
+ 	# a list of ip-addresses that can be used in private networks
+	PRIVATE_IP_SEGMENTS = [
+		IPAddressBlock.parse(block)
+		for block in
+		("10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16")
+	]
+	
+	LOCALHOST_SEGMENT = IPAddressBlock.parse("127.0.0.0/8")
+	
+	MULTICAST_SEGMENT = IPAddressBlock.parse("224.0.0.0/4")
+	RESERVED_SEGMENT = IPAddressBlock.parse("240.0.0.0/4")
+	
+	ZERO_CONF_SEGMENT = IPAddressBlock.parse("169.254.0.0/16")
+	
+	@staticmethod
+	def is_private(ip: IPAddress) -> bool:
+		return any(ip in block for block in ReservedIPBlocks.PRIVATE_IP_SEGMENTS)
+	
+	@staticmethod
+	def get_private_segment(ip: IPAddress) -> "Optional[IPAddressBlock]":
+		if not ReservedIPBlocks.is_private(ip):
+			raise ValueError("%s is not part of a private IP segment" % ip)
+
+		for block in ReservedIPBlocks.PRIVATE_IP_SEGMENTS:
+			if ip in block:
+				return block
+
+	@staticmethod
+	def is_localhost(ip: IPAddress) -> bool:
+		return ip in ReservedIPBlocks.LOCALHOST_SEGMENT
+	
+	@staticmethod
+	def is_multicast(ip: IPAddressBlock) -> bool:
+		return ip in ReservedIPBlocks.MULTICAST_SEGMENT
+	
+	@staticmethod
+	def is_reserved(ip: IPAddress) -> bool:
+		return ip in ReservedIPBlocks.RESERVED_SEGMENT
+	
+	@staticmethod
+	def is_zero_conf(ip: IPAddressBlock) -> bool:
+		return ip in ReservedIPBlocks.ZERO_CONF_SEGMENT
+

+ 203 - 0
code/ID2TLib/PcapAddressOperations.py

@@ -0,0 +1,203 @@
+from random import choice
+
+from Core import Statistics
+from ID2TLib.IPv4 import IPAddress
+
+is_ipv4 = IPAddress.is_ipv4
+
+class PcapAddressOperations():
+
+    def __init__(self, statistics: Statistics, uncertain_ip_mult: int=3):
+        """
+        Initializes a pcap information extractor that uses the provided statistics for its operations.
+
+        :param statistics: The statistics of the pcap file
+        :param uncertain_ip_mult: the mutliplier to create new address space when the remaining observed space has been drained
+        """
+        self.statistics = statistics
+        self.UNCERTAIN_IPSPACE_MULTIPLIER = uncertain_ip_mult
+        self._init_ipaddress_ops()
+
+    def get_probable_router_mac(self):
+        """
+        Returns the most probable router MAC address based on the most used MAC address in the statistics.
+        :return: the MAC address
+        """
+        self.probable_router_mac, count = self.statistics.process_db_query("most_used(macAddress)", print_results=False)[0]
+        return self.probable_router_mac     # and count as a measure of certainty?
+
+    def pcap_contains_priv_ips(self):
+        """
+        Returns if the provided traffic contains private IPs.
+        :return: True if the provided traffic contains private IPs, otherwise False
+        """
+        return self.contains_priv_ips
+
+    def get_local_address_range(self):
+        """
+        Returns a tuple with the start and end of the observed local IP range.
+        :return: The IP range as tuple
+        """
+        return str(self.min_local_ip), str(self.max_local_ip)
+
+    def get_count_rem_local_ips(self):
+        """
+        Returns the number of local IPs in the pcap file that have not aldready been returned by get_existing_local_ips.
+        :return: the not yet assigned local IPs
+        """
+        return len(self.remaining_local_ips)
+
+    def get_existing_local_ips(self, count: int=1):
+        """
+        Returns the given number of local IPs that are existent in the pcap file.
+
+        :param count: the number of local IPs to return
+        :return: the chosen local IPs
+        """
+
+        if count > len(self.remaining_local_ips):
+            print("Warning: There are no more {} local IPs in the .pcap file. Returning all remaining local IPs.".format(count))
+
+        total = min(len(self.remaining_local_ips), count)
+
+        retr_local_ips = []
+        local_ips = self.remaining_local_ips
+        for _ in range(0, total):
+            random_local_ip = choice(sorted(local_ips))
+            retr_local_ips.append(str(random_local_ip))
+            local_ips.remove(random_local_ip)
+
+        return retr_local_ips
+
+    def get_new_local_ips(self, count: int=1):
+        """
+        Returns in the pcap not existent local IPs that are in proximity of the observed local IPs. IPs can be returned
+        that are either between the minimum and maximum observed IP and are therefore considered certain
+        or that are above the observed maximum address, are more likely to not belong to the local network 
+        and are therefore considered uncertain.
+
+        :param count: the number of new local IPs to return
+        :return: the newly created local IP addresses
+        """
+
+        unused_local_ips = self.unused_local_ips
+        uncertain_local_ips = self.uncertain_local_ips
+        count_certain = min(count, len(unused_local_ips))
+        retr_local_ips = []
+
+        for _ in range(0, count_certain):
+            random_local_ip = choice(sorted(unused_local_ips))
+            retr_local_ips.append(str(random_local_ip))
+            unused_local_ips.remove(random_local_ip)
+
+        # retrieve uncertain local ips
+        if count_certain < count:
+            count_uncertain = count - count_certain
+
+            # check if new uncertain IPs have to be created
+            if len(uncertain_local_ips) < count_uncertain:
+                ipspace_multiplier = self.UNCERTAIN_IPSPACE_MULTIPLIER
+
+                max_new_ip = self.max_uncertain_local_ip.to_int() + ipspace_multiplier * count_uncertain
+
+                count_new_ips = max_new_ip - self.max_uncertain_local_ip.to_int()
+
+                # create ipspace_multiplier * count_uncertain new uncertain local IP addresses
+                last_gen_ip = None
+                for i in range(1, count_new_ips + 1):
+                    ip = IPAddress.from_int(self.max_uncertain_local_ip.to_int() + i)
+                    # exclude the definite broadcast address
+                    if self.priv_ip_segment:
+                        if ip.to_int() >= self.priv_ip_segment.last_address().to_int():
+                            break
+                    uncertain_local_ips.add(ip)
+                    last_gen_ip = ip
+                self.max_uncertain_local_ip = last_gen_ip
+
+            # choose the uncertain IPs to return
+            total_uncertain = min(count_uncertain, len(uncertain_local_ips))
+            for _ in range(0, total_uncertain):
+                random_local_ip = choice(sorted(uncertain_local_ips))
+                retr_local_ips.append(str(random_local_ip))
+                uncertain_local_ips.remove(random_local_ip)
+            
+        return retr_local_ips
+
+    def get_existing_external_ips(self, count: int=1):
+        """
+        Returns the given number of external IPs that are existent in the pcap file.
+
+        :param count: the number of external IPs to return
+        :return: the chosen external IPs
+        """
+
+        if not (len(self.external_ips) > 0):
+            print("Warning: .pcap does not contain any external ips.")
+            return []
+
+        total = min(len(self.remaining_external_ips), count)
+        retr_external_ips = []
+        external_ips = self.remaining_external_ips
+
+        for _ in range(0, total):
+            random_external_ip = choice(sorted(external_ips))
+            retr_external_ips.append(str(random_external_ip))
+            external_ips.remove(random_external_ip)
+
+        return retr_external_ips
+
+    def _init_ipaddress_ops(self):
+        """
+        Load and process data needed to perform functions on the IP addresses contained in the statistics
+        """
+
+        # retrieve local and external IPs
+        all_ips_str = set(self.statistics.process_db_query("all(ipAddress)", print_results=False))
+        external_ips_str = set(self.statistics.process_db_query("ipAddress(macAddress=%s)" % self.get_probable_router_mac(), print_results=False))  # including router
+        local_ips_str = all_ips_str - external_ips_str
+        external_ips = set()
+        local_ips = set()
+        self.contains_priv_ips = False
+        self.priv_ip_segment = None
+
+        # convert local IP strings to IPv4.IPAddress representation
+        for ip in local_ips_str:
+            if is_ipv4(ip):
+                ip = IPAddress.parse(ip)
+                if ip.is_private() and not self.contains_priv_ips:
+                    self.contains_priv_ips = True
+                    self.priv_ip_segment = ip.get_private_segment()
+                # exclude local broadcast address and other special addresses
+                if (not str(ip) == "255.255.255.255") and (not ip.is_localhost()) and (not ip.is_multicast()) and (not ip.is_reserved()) and (not ip.is_zero_conf()):
+                    local_ips.add(ip)
+
+        # convert external IP strings to IPv4.IPAddress representation
+        for ip in external_ips_str:
+            if is_ipv4(ip):
+                ip = IPAddress.parse(ip)
+                # if router MAC can definitely be mapped to local/private IP, add it to local_ips (because at first it is stored in external_ips, see above)
+                # this depends on whether the local network is identified by a private IP address range or not.
+                if ip.is_private():
+                    local_ips.add(ip)
+                # exclude local broadcast address and other special addresses
+                elif (not str(ip) == "255.255.255.255") and (not ip.is_localhost()) and (not ip.is_multicast()) and (not ip.is_reserved()) and (not ip.is_zero_conf()):
+                    external_ips.add(ip)
+
+        min_local_ip, max_local_ip = min(local_ips), max(local_ips)
+
+        # save the certain unused local IPs of the network
+        unused_local_ips = set()
+        for i in range(min_local_ip.to_int() + 1, max_local_ip.to_int()):
+            ip = IPAddress.from_int(i)
+            if not ip in local_ips:
+                unused_local_ips.add(ip)
+
+        # save the gathered information for efficient later use
+        self.external_ips = frozenset(external_ips)
+        self.remaining_external_ips = external_ips
+        self.min_local_ip, self.max_local_ip = min_local_ip, max_local_ip
+        self.max_uncertain_local_ip = max_local_ip
+        self.local_ips = frozenset(local_ips)
+        self.remaining_local_ips = local_ips
+        self.unused_local_ips = unused_local_ips
+        self.uncertain_local_ips = set()

+ 259 - 0
code/ID2TLib/Ports.py

@@ -0,0 +1,259 @@
+import random, copy
+
+
+# information taken from https://www.cymru.com/jtk/misc/ephemeralports.html
+class PortRanges:
+    # dynamic ports as listed by RFC 6056
+    DYNAMIC_PORTS = range(49152, 65536)
+
+    LINUX = range(32768, 61001)
+    FREEBSD = range(10000, 65536)
+
+    APPLE_IOS = DYNAMIC_PORTS
+    APPLE_OSX = DYNAMIC_PORTS
+
+    WINDOWS_7 = DYNAMIC_PORTS
+    WINDOWS_8 = DYNAMIC_PORTS
+    WINDOWS_VISTA = DYNAMIC_PORTS
+    WINDOWS_XP = range(1024, 5001)
+
+
+# This class uses classes instead of functions so deepcloning works
+class PortSelectionStrategy:
+    class sequential:
+        def __init__(self):
+            self.counter = -1
+
+        # that function will always return a one higher counter than before,
+        # restarting from the start once it reached the highest value
+        def __call__(self, port_range, *args):
+            if self.counter == -1:
+                self.counter = port_range.start
+
+            port = self.counter
+
+            self.counter += 1
+            if self.counter == port_range.stop:
+                self.counter = port_range.start
+
+            return port
+
+    class random:
+        def __call__(self, port_range, *args):
+            return random.randrange(port_range.start, port_range.stop)
+
+    class linux_kernel:
+        """
+		A port-selectioin-strategy oriented on the linux-kernel
+		The implementation follows https://github.com/torvalds/linux/blob/master/net/ipv4/inet_connection_sock.c#L173
+		as much as possible when converting from one language to another (The newest file was used
+		by the time of writing, make sure you select the correct one when following the link!)
+		"""
+
+        def __call__(self, port_range: range, port_selector, *args):
+            """
+			This method is an attempt to map a c-function to python. To solve the goto-problem
+			while-true's have been added. Both of the while-true's are placed where the original
+			had a label to jump to. break's and continue's are set to preserve the original
+			control flow. Another method could have been used to rewrite the c-code, however this
+			was chosen to preserve the similarity between this and the original
+
+			:param port_range: the port range to choose from
+			:param port_selector: the port selector that tells which ports are in use
+			:param args: Not used for now
+			:return: A port number
+			"""
+            port = 0
+            low, high = port_range.start, port_range.stop
+
+            # this var tells us if we should use the upper or lower port-range-half, or the whole range if
+            # this var is None. The original was an enum of the values 0, 1 and 2. But I think an Optional[bool]
+            # is more clear
+            # None: use whole range, True: use lower half, False: use upper half
+            attempt_half = True
+
+            high += 1  # line 186 in the original file
+            while True:
+                if high - low < 4:
+                    attempt_half = None
+                if attempt_half is not None:
+                    # appearently a fast method to find a number close to the real half
+                    # unless the difference between high and low is 4 (see above, note the 2-shift below)
+                    # this does not work
+                    half = low + (((high - low) >> 2) << 1)
+
+                    if attempt_half:
+                        high = half
+                    else:
+                        low = half
+
+                remaining = high - low
+                if remaining > 1:
+                    remaining &= ~1  # flip the 1-bit
+
+                offset = random.randrange(0, remaining)
+                offset |= 1;
+
+                attempt_half_before = attempt_half  # slight hack to keep track of change
+                while True:
+                    port = low + offset
+
+                    for i in range(0, remaining, 2):
+                        if port >= high:
+                            port -= remaining
+
+                        if port_selector.is_port_in_use(port):
+                            port += 2
+                            continue
+
+                        return port
+
+                    offset -= 1
+                    if not (offset & 1):
+                        continue
+
+                    if attempt_half:
+                        attempt_half = False
+                        break
+
+                if attempt_half_before:  # we still got ports to search, attemp_half was just set to False
+                    continue
+                if not attempt_half:  # the port-range is exhausted
+                    break
+
+            raise ValueError("Could not find suitable port")
+
+
+class PortSelector:
+    """
+	This class simulates a port-selection-process. Instances keep a list of port-numbers they generated so
+	the same port-number will not be generated again.
+	"""
+
+    def __init__(self, port_range, select_function):
+        """
+		Create a PortSelector given a range of ports to choose from and a function that chooses the next port
+		
+		:param port_range: a range-object containing the range of ports to choose from
+		:param select_function: a function that receives the port_range and selects a port
+		"""
+
+        if len(port_range) == 0:
+            raise ValueError("cannot choose from an empty range")
+        if port_range.start not in range(1, 65536) or port_range.stop not in range(1, 65536 + 1):
+            raise ValueError("port_range is no subset of the valid port-range")
+
+        self.port_range = port_range
+
+        self._select_port = select_function
+
+        self.generated = []
+
+    def select_port(self):
+        # do this check to avoid endless loops
+        if len(self.generated) == len(self.port_range):
+            raise RuntimeError(
+                "All %i port numbers were already generated, no more can be generated" % len(self.port_range))
+
+        while True:
+            port = self._select_port(self.port_range, self)
+
+            if port not in self.generated:
+                self.generated.append(port)
+                return port
+
+    def is_port_in_use(self, port: int):
+        return port in self.generated
+
+    def undo_port_use(self, port: int):
+        if port in self.generated:
+            self.generated.remove(port)
+        else:
+            raise ValueError("Port %i is not in use and thus can not be undone" % port)
+
+    def reduce_size(self, size: int):
+        """
+		Reduce the list of already generated ports to the last <size> generated.
+		If size if bigger than the number of generated ports nothing happens.
+		"""
+        self.generated = self.generated[-size:]
+
+    def clear(self):
+        """
+		Clear the list of generated ports. As of now this does not reset the state of the selection-function
+		"""
+        self.generated = []
+
+    def clone(self):
+        return copy.deepcopy(self)
+
+
+class ProtocolPortSelector:
+    """
+	This class contains a method to select ports for udp and tcp. It generally consists of the port-selectors, one
+	for tcp and one for udp. For convenience this class has a __getattr__-method to call methods on both selectors
+	at once. E.g, clear() does not exist for ProtocolPortSelector but it does for PortSelector, therefore
+	protocolPortSelector.clear() will call clear for both port-selectors.
+	"""
+
+    def __init__(self, port_range, select_tcp, select_udp=None):
+        self.tcp = PortSelector(port_range, select_tcp)
+        self.udp = PortSelector(port_range, select_udp or select_tcp)
+
+    def get_tcp_generator(self):
+        return self.tcp
+
+    def get_udp_generator(self):
+        return self.udp
+
+    def select_port_tcp(self):
+        return self.tcp.select_port()
+
+    def select_port_udp(self):
+        return self.udp.select_port()
+
+    def is_port_in_use_tcp(self, port):
+        return self.tcp.is_port_in_use(port)
+
+    def is_port_in_use_udp(self, port):
+        return self.udp.is_port_in_use(port)
+
+    def clone(self):
+        class Tmp: pass
+
+        clone = Tmp()
+        clone.__class__ = type(self)
+
+        clone.udp = self.udp.clone()
+        clone.tcp = self.tcp.clone()
+
+        return clone
+
+    def __getattr__(self, attr):
+        val = getattr(self.tcp, attr)
+
+        if callable(val):  # we proprably got a method here
+            tcp_meth = val
+            udp_meth = getattr(self.udp, attr)
+
+            def double_method(*args, **kwargs):
+                return (tcp_meth(*args, **kwargs), udp_meth(*args, **kwargs))
+
+            return double_method  # calling this function will call the method for both port-selectors
+        else:  # we have found a simple value, return a tuple containing the attribute-value from both port-selectors
+            return (val, getattr(self.udp, attr))
+
+
+class PortSelectors:
+    """
+	To save some time this class contains some of the port-selection-strategies found in the wild. It is recommend to use
+	.clone() to get your personal copy, otherwise two parts of your code might select ports on the same port-selector which
+	is something your might want to avoid.
+	"""
+    LINUX = ProtocolPortSelector(PortRanges.LINUX, PortSelectionStrategy.random())
+    APPLE = ProtocolPortSelector(PortRanges.DYNAMIC_PORTS,
+                                 PortSelectionStrategy.sequential(),
+                                 PortSelectionStrategy.random())
+    FREEBSD = ProtocolPortSelector(PortRanges.FREEBSD, PortSelectionStrategy.random())
+    WINDOWS = ProtocolPortSelector(PortRanges.WINDOWS_7,
+                                   PortSelectionStrategy.random())  # the selection strategy is a guess as i can't find more info on it

BIN
resources/telnet-raw.pcap