瀏覽代碼

Merge branch 'develop' of https://git.tk.informatik.tu-darmstadt.de/leon.boeck/ID2T-toolkit-BotnetTraffic into develop

Denis Waßmann 6 年之前
父節點
當前提交
37f80c09b8

+ 35 - 69
code/Attack/MembersMgmtCommAttack.py

@@ -62,6 +62,7 @@ class Message():
 
 
 from ID2TLib import FileUtils, Generator
+from ID2TLib.IPv4 import IPAddress
 from ID2TLib.PcapAddressOperations import PcapAddressOperations
 from ID2TLib.CommunicationProcessor import CommunicationProcessor
 from ID2TLib.Botnet.MessageMapping import MessageMapping
@@ -319,30 +320,6 @@ class MembersMgmtCommAttack(BaseAttack.BaseAttack):
                     bot_configs[bot]["TTL"] = source_ttl_prob_dict.random()
 
 
-
-
-
-        def add_delay(timestamp: float, minDelay: float, delay: float):
-            '''
-            Adds delay to a timestamp, with a minimum value of minDelay. But usually a value close to delay
-            :param timestamp: the timestamp that is to be increased
-            :param minDelay: the minimum value that is to be added to the timestamp
-            :param delay: The general size of the delay. Statistically speaking: the expected value
-            :return: the updated timestamp
-            '''
-
-            randomdelay = Lea.fromValFreqsDict({0.15*delay: 7, 0.3*delay: 10, 0.7*delay:20,
-                                delay:33, 1.2*delay:20, 1.6*delay: 10, 1.9*delay: 7, 2.5*delay: 3, 4*delay: 1})
-            if 0.1*delay < minDelay:
-                print("Warning: minDelay probably too big when computing time_stamps")
-
-            # updated timestamps consist of the sum of the minimum delay, the magnitude of the delay
-            # and a deviation by up to 10% in order to guarantee uniqueness
-            general_offset = randomdelay.random()
-            unique_offset = uniform(-0.1*general_offset, 0.1*general_offset)
-            return timestamp + minDelay + general_offset + unique_offset
-
-
         def move_xml_to_outdir(filepath_xml: str):
             """
             Moves the XML file at filepath_xml to the output directory of the PCAP
@@ -434,62 +411,51 @@ class MembersMgmtCommAttack(BaseAttack.BaseAttack):
             add_ids_to_config(sorted(external_ids), existing_external_ips, new_external_ips, bot_configs, idtype="external", router_mac=router_mac)
 
         #### Set realistic timestamps for messages ####
-        most_used_ip_address = self.statistics.get_most_used_ip_address()
-        minDelay = self.get_reply_delay(most_used_ip_address)[0]
-        next_timestamp = self.get_param_value(Param.INJECT_AT_TIMESTAMP)
-        pcap_duration = float(self._get_capture_duration())
-        equi_timeslice = pcap_duration/len(messages)
-
-        # Dict, takes a tuple of 2 Bot_IDs as a key (ID with lower number first), returns the time when the Hello_reply came in
-        hello_times = {}
-        # msg_IDs with already updated timestamps
+
+        # this is the timestamp at which the first packet should be injected, the packets have to be shifted to the beginning of the
+        # pcap file (INJECT_AT_TIMESTAMP) and then the offset of the packets have to be compensated to start at the given point in time
+        zero_reference = self.get_param_value(Param.INJECT_AT_TIMESTAMP) - messages[0].time
+
         updated_msgs = []
+        last_response = {}      # Dict, takes a tuple of 2 Bot_IDs as a key (requester, responder), returns the time of the last response, the requester received
+                                # necessary in order to make sure, that additional requests are sent only after the response to the last one was received
+        for msg in messages:    # init
+            last_response[(msg.src, msg.dst)] = -1
+
+        # calculate the average delay values for local and external responses
+        avg_delay_local, avg_delay_external = self.statistics.get_avg_delay_local_ext()
 
+        # update all timestamps
         for req_msg in messages:
-            updated = 0
-            if(req_msg.msg_id in updated_msgs):
+
+            if(req_msg in updated_msgs):
                 # message already updated
                 continue
 
-            if(req_msg.msg_id == -1):
-                # message has no corresponding request/response
-                req_msg.time = next_timestamp
-                next_timestamp = add_delay(next_timestamp, minDelay, equi_timeslice)
-                updated_msgs.append(req_msg.msg_id)
-                continue
+            # if req_msg.timestamp would be before the timestamp of the response to the last request, req_msg needs to be sent later (else branch)
+            if last_response[(req_msg.src, req_msg.dst)] == -1 or last_response[(req_msg.src, req_msg.dst)] < (zero_reference + req_msg.time - 0.05):
+                ## update req_msg timestamp with a variation of up to 50ms
+                req_msg.time = zero_reference + req_msg.time + uniform(-0.05, 0.05)
+                updated_msgs.append(req_msg)
 
+            else:
+                req_msg.time = last_response[(req_msg.src, req_msg.dst)] + 0.06 + uniform(-0.05, 0.05)
 
-            elif req_msg.type != MessageType.SALITY_HELLO:
-                # Hello messages must have preceded, so make sure the timestamp of this msg is after the HELLO_REPLY
-                if int(req_msg.src) < int(req_msg.dst):
-                    hello_time = hello_times[(req_msg.src, req_msg.dst)]
-                else:
-                    hello_time = hello_times[(req_msg.dst, req_msg.src)]
-
-                if next_timestamp < hello_time:
-                    # use the time of the hello_reply instead of next_timestamp to update this pair of messages
-                    post_hello = add_delay(hello_time, minDelay, equi_timeslice)
-                    respns_msg = messages[req_msg.refer_msg_id]
-                    respns_msg.time = add_delay(post_hello, minDelay, equi_timeslice)
-                    req_msg.time = post_hello
-                    updated = 1
-
-            if not updated:
-                # update normally
+            # update response if necessary
+            if req_msg.refer_msg_id != -1:
                 respns_msg = messages[req_msg.refer_msg_id]
-                respns_msg.time = add_delay(next_timestamp, minDelay, equi_timeslice)
-                req_msg.time = next_timestamp
-                next_timestamp = add_delay(next_timestamp, minDelay, equi_timeslice)
 
-            updated_msgs.append(req_msg.msg_id)
-            updated_msgs.append(req_msg.refer_msg_id)
-
-            if req_msg.type == MessageType.SALITY_HELLO:
-                # if hello messages have been exchanged, save timestamp of the HELLO_REPLY
-                if int(req_msg.src) < int(req_msg.dst):
-                    hello_times[(req_msg.src, req_msg.dst)] = respns_msg.time
+                # check for local or external communication and update response timestamp with the respective avg delay
+                if req_msg.src in external_ids or req_msg.dst in external_ids:
+                    #external communication
+                    respns_msg.time = req_msg.time + avg_delay_external + uniform(-0.1*avg_delay_external, 0.1*avg_delay_external)
+                
                 else:
-                    hello_times[(req_msg.dst, req_msg.src)] = respns_msg.time
+                    #local communication
+                    respns_msg.time = req_msg.time + avg_delay_local + uniform(-0.1*avg_delay_local, 0.1*avg_delay_local)
+
+                updated_msgs.append(respns_msg)
+                last_response[(req_msg.src, req_msg.dst)] = respns_msg.time
 
         # create port configurations for the bots
         for bot in bot_configs:

+ 1 - 2
code/ID2TLib/AttackController.py

@@ -4,7 +4,6 @@ import sys
 from Attack.AttackParameters import Parameter
 from ID2TLib import LabelManager
 from ID2TLib import Statistics
-from ID2TLib.OldLibs.Label import Label
 from ID2TLib.PcapFile import PcapFile
 
 
@@ -85,7 +84,7 @@ class AttackController:
         print("done. (total: " + str(total_packets) + " pkts.)")
 
         # Store label into LabelManager
-        l = Label(attack, self.get_attack_start_utime(),
+        l = LabelManager.Label(attack, self.get_attack_start_utime(),
                   self.get_attack_end_utime(), attack_note)
         self.label_mgr.add_labels(l)
 

+ 248 - 0
code/ID2TLib/OtherGroupLib/Controller.py

@@ -0,0 +1,248 @@
+import os
+import sys
+import readline
+
+from ID2TLib.AttackController import AttackController
+from ID2TLib.LabelManager import LabelManager
+from ID2TLib.PcapFile import PcapFile
+from ID2TLib.Statistics import Statistics
+
+
+class Controller:
+    def __init__(self, pcap_file_path: str, do_extra_tests: bool):
+        """
+        Creates a new Controller, acting as a central coordinator for the whole application.
+        :param pcap_file_path:
+        """
+        # Fields
+        self.pcap_src_path = pcap_file_path.strip()
+        self.pcap_dest_path = ''
+        self.written_pcaps = []
+        self.do_extra_tests = do_extra_tests
+
+        # Initialize class instances
+        print("Input file: %s" % self.pcap_src_path)
+        self.pcap_file = PcapFile(self.pcap_src_path)
+        self.label_manager = LabelManager(self.pcap_src_path)
+        self.statistics = Statistics(self.pcap_file)
+        self.statistics.do_extra_tests = self.do_extra_tests
+        self.statisticsDB = self.statistics.get_statistics_database()
+        self.attack_controller = AttackController(self.pcap_file, self.statistics, self.label_manager)
+
+    def load_pcap_statistics(self, flag_write_file: bool, flag_recalculate_stats: bool, flag_print_statistics: bool):
+        """
+        Loads the PCAP statistics either from the database, if the statistics were calculated earlier, or calculates
+        the statistics and creates a new database.
+        :param flag_write_file: Writes the statistics to a file.
+        :param flag_recalculate_stats: Forces the recalculation of statistics.
+        :param flag_print_statistics: Prints the statistics on the terminal.
+        :return: None
+        """
+        self.statistics.load_pcap_statistics(flag_write_file, flag_recalculate_stats, flag_print_statistics)
+
+    def process_attacks(self, attacks_config: list):
+        """
+        Creates the attack based on the attack name and the attack parameters given in the attacks_config. The
+        attacks_config is a list of attacks, e.g.
+        [['PortscanAttack', 'ip.src="192.168.178.2",'dst.port=80'],['PortscanAttack', 'ip.src="10.10.10.2"]].
+        Merges the individual temporary attack pcaps into one single pcap and merges this single pcap with the
+        input dataset.
+        :param attacks_config: A list of attacks with their attack parameters.
+        """
+        # load attacks sequentially
+        for attack in attacks_config:
+            temp_attack_pcap = self.attack_controller.process_attack(attack[0], attack[1:])
+            self.written_pcaps.append(temp_attack_pcap)
+
+        # merge attack pcaps to get single attack pcap
+        if len(self.written_pcaps) > 1:
+            print("\nMerging temporary attack pcaps into single pcap file...", end=" ")
+            sys.stdout.flush()  # force python to print text immediately
+            for i in range(0, len(self.written_pcaps) - 1):
+                attacks_pcap = PcapFile(self.written_pcaps[i])
+                attacks_pcap_path = attacks_pcap.merge_attack(self.written_pcaps[i + 1])
+                os.remove(self.written_pcaps[i + 1])  # remove merged pcap
+                self.written_pcaps[i + 1] = attacks_pcap_path
+            print("done.")
+        else:
+            attacks_pcap_path = self.written_pcaps[0]
+
+        # merge single attack pcap with all attacks into base pcap
+        print("Merging base pcap with single attack pcap...", end=" ")
+        sys.stdout.flush()  # force python to print text immediately
+        self.pcap_dest_path = self.pcap_file.merge_attack(attacks_pcap_path)
+        print("done.")
+
+        # delete intermediate PCAP files
+        print('Deleting intermediate attack pcap...', end=" ")
+        sys.stdout.flush()  # force python to print text immediately
+        os.remove(attacks_pcap_path)
+        print("done.")
+
+        # write label file with attacks
+        self.label_manager.write_label_file(self.pcap_dest_path)
+
+        # print status message
+        print('\nOutput files created: \n', self.pcap_dest_path, '\n', self.label_manager.label_file_path)
+
+    def process_db_queries(self, query, print_results=False):
+        """
+        Processes a statistics database query. This can be a standard SQL query or a named query.
+        :param query: The query as a string or multiple queries as a list of strings.
+        :param print_results: Must be True if the results should be printed to terminal.
+        :return: The query's result
+        """
+        print("Processing database query/queries...")
+        if isinstance(query, list) or isinstance(query, tuple):
+            for q in query:
+                self.statisticsDB.process_db_query(q, print_results)
+        else:
+            self.statisticsDB.process_db_query(query, print_results)
+
+    @staticmethod
+    def process_help(params):
+        if not params:
+            print("Query mode allows you to enter SQL-queries as well as named queries.")
+            print()
+            print("Named queries:")
+            print("\tSelectors:")
+            print("\t\tmost_used(...)  -> Returns the most occurring element in all elements")
+            print("\t\tleast_used(...) -> Returns the least occurring element in all elements")
+            print("\t\tavg(...)        -> Returns the average of all elements")
+            print("\t\tall(...)        -> Returns all elements")
+            print("\tExtractors:")
+            print("\t\trandom(...)     -> Returns a random element from a list")
+            print("\t\tfirst(...)      -> Returns the first element from a list")
+            print("\t\tlast(...)       -> Returns the last element from a list")
+            print("\tParameterized selectors:")
+            print("\t\tipAddress(...)  -> Returns all IP addresses fulfilling the specified conditions")
+            print("\t\tmacAddress(...) -> Returns all MAC addresses fulfilling the specified conditions")
+            print()
+            print("Miscellaneous:")
+            print("\tlabels            -> List all attacks listed in the label file, if any")
+            print()
+            print("Additional information is available with 'help [KEYWORD];'")
+            print("To get a list of examples, type 'help examples;'")
+            print()
+            return
+
+        param = params[0].lower()
+        if param == "most_used":
+            print("most_used can be used as a selector for the following attributes:")
+            print("ipAddress | macAddress | portNumber | protocolName | ttlValue | mssValue | winSize | ipClass")
+            print()
+        elif param == "least_used":
+            print("least_used can be used as a selector for the following attributes:")
+            print("ipAddress | macAddress | portNumber | protocolName | ttlValue")
+            print()
+        elif param == "avg":
+            print("avg can be used as a selector for the following attributes:")
+            print("pktsReceived | pktsSent | kbytesSent | kbytesReceived | ttlValue | mss")
+            print()
+        elif param == "all":
+            print("all can be used as a selector for the following attributes:")
+            print("ipAddress | ttlValue | mss | macAddress | portNumber | protocolName")
+            print()
+        elif param in ["random", "first", "last"]:
+            print("No additional info available for this keyword.")
+            print()
+        elif param == "ipaddress":
+            print("ipAddress is a parameterized selector which fetches IP addresses based on (a list of) conditions.")
+            print("Conditions are of the following form: PARAMETER OPERATOR VALUE")
+            print("The following parameters can be specified:")
+            print("pktsReceived | pktsSent | kbytesReceived | kbytesSent | maxPktRate | minPktRate | ipClass\n"
+                  "macAddress | ttlValue | ttlCount | portDirection | portNumber | portCount | protocolCount\n"
+                  "protocolName")
+            print()
+            print("See 'help examples;' for usage examples.")
+            print()
+        elif param == "macaddress":
+            print("macAddress is a parameterized selector which fetches MAC addresses based on (a list of) conditions.")
+            print("Conditions are of the following form: PARAMETER OPERATOR VALUE")
+            print("The following parameters can be specified:")
+            print("ipAddress")
+            print()
+            print("See 'help examples;' for usage examples.")
+            print()
+        elif param == "examples":
+            print("Get the average amount of sent packets per IP:")
+            print("\tavg(pktsSent);")
+            print("Get a random IP from all addresses occuring in the pcap:")
+            print("\trandom(all(ipAddress));")
+            print("Return the MAC address of a specified IP:")
+            print("\tmacAddress(ipAddress=192.168.178.2);")
+            print("Get the average TTL-value with SQL:")
+            print("\tSELECT avg(ttlValue) from ip_ttl;")
+            print("Get a random IP address from all addresses that sent and received at least 10 packets:")
+            print("\trandom(ipAddress(pktsSent > 10, pktsReceived > 10));")
+            print()
+        else:
+            print("Unknown keyword '" + param + "', try 'help;' to get a list of allowed keywords'")
+            print()
+
+    def enter_query_mode(self):
+        """
+        Enters into the query mode. This is a read-eval-print-loop, where the user can input named queries or SQL
+        queries and the results are printed.
+        """
+
+        def make_completer(vocabulary):
+            def custom_template(text, state):
+                results = [x for x in vocabulary if x.startswith(text)] + [None]
+                return results[state]
+            return custom_template
+
+        readline.parse_and_bind('tab: complete')
+        readline.set_completer(make_completer(self.statisticsDB.get_all_named_query_keywords()+self.statisticsDB.get_all_sql_query_keywords()))
+        history_file = os.path.join(os.path.expanduser('~'), 'ID2T_data', 'query_history')
+        try:
+            readline.read_history_file(history_file)
+        except IOError:
+            pass
+        print("Entering into query mode...")
+        print("Enter statement ending by ';' and press ENTER to send query. Exit by sending an empty query.")
+        print("Type 'help;' for information on possible queries.")
+        buffer = ""
+        while True:
+            line = input("> ")
+            if line == "":
+                break
+            buffer += line
+            import sqlite3
+            if sqlite3.complete_statement(buffer):
+                try:
+                    buffer = buffer.strip()
+                    if buffer.lower().startswith('help'):
+                        buffer = buffer.strip(';')
+                        self.process_help(buffer.split(' ')[1:])
+                    elif buffer.lower().strip() == 'labels;':
+                        if not self.label_manager.labels:
+                            print("No labels found.")
+                        else:
+                            print("Attacks listed in the label file:")
+                            print()
+                            for label in self.label_manager.labels:
+                                print("Attack name:     " + str(label.attack_name))
+                                print("Attack note:     " + str(label.attack_note))
+                                print("Start timestamp: " + str(label.timestamp_start))
+                                print("End timestamp:   " + str(label.timestamp_end))
+                                print()
+                        print()
+                    else:
+                        self.statisticsDB.process_db_query(buffer, True)
+                except sqlite3.Error as e:
+                    print("An error occurred:", e.args[0])
+                buffer = ""
+
+        readline.set_history_length(1000)
+        readline.write_history_file(history_file)
+
+    def create_statistics_plot(self, params: str):
+        """
+        Plots the statistics to a file by using the given customization parameters.
+        """
+        if params is not None and params[0] is not None:
+            params_dict = dict([z.split("=") for z in params])
+            self.statistics.plot_statistics(format=params_dict['format'])
+        else:
+            self.statistics.plot_statistics()

+ 32 - 0
code/ID2TLib/OtherGroupLib/Label.py

@@ -0,0 +1,32 @@
+from functools import total_ordering
+
+
+@total_ordering
+class Label:
+    def __init__(self, attack_name, timestamp_start, timestamp_end, attack_note=""):
+        """
+        Creates a new attack label
+
+        :param attack_name: The name of the associated attack
+        :param timestamp_start: The timestamp as unix time of the first attack packet
+        :param timestamp_end: The timestamp as unix time of the last attack packet
+        :param attack_note: A note associated to the attack (optional)
+        """
+        self.attack_name = attack_name
+        self.timestamp_start = timestamp_start
+        self.timestamp_end = timestamp_end
+        self.attack_note = attack_note
+
+    def __eq__(self, other):
+        return self.timestamp == other.timestamp
+
+    def __lt__(self, other):
+        return self.timestamp_start < other.timestamp_start
+
+    def __gt__(self, other):
+        return self.timestamp_start > other.timestamp_start
+
+    def __str__(self):
+        return ''.join(
+            ['(', self.attack_name, ',', self.attack_note, ',', str(self.timestamp_start), ',', str(self.timestamp_end),
+             ')'])

+ 169 - 0
code/ID2TLib/OtherGroupLib/LabelManager.py

@@ -0,0 +1,169 @@
+import os.path
+from datetime import datetime
+from xml.dom.minidom import *
+
+import ID2TLib.Label as Label
+
+
+class LabelManager:
+    TAG_ROOT = 'LABELS'
+    TAG_ATTACK = 'attack'
+    TAG_ATTACK_NAME = 'attack_name'
+    TAG_ATTACK_NOTE = 'attack_note'
+    TAG_TIMESTAMP_START = 'timestamp_start'
+    TAG_TIMESTAMP_END = 'timestamp_end'
+    TAG_TIMESTAMP = 'timestamp'
+    TAG_TIMESTAMP_HR = 'timestamp_hr'
+    ATTR_VERSION = 'version_parser'
+
+    # update this attribute if XML scheme was modified
+    ATTR_VERSION_VALUE = '0.2'
+
+    def __init__(self, filepath_pcap=None):
+        """
+        Creates a new LabelManager for managing the attack's labels.
+
+        :param filepath_pcap: The path to the PCAP file associated to the labels.
+        """
+        self.labels = list()
+
+        if filepath_pcap is not None:
+            self.label_file_path = filepath_pcap.strip('.pcap') + '_labels.xml'
+            # only load labels if label file is existing
+            if os.path.exists(self.label_file_path):
+                self.load_labels()
+
+    def add_labels(self, labels):
+        """
+        Adds a label to the internal list of labels.
+
+        :param labels: The labels to be added
+        """
+        if isinstance(labels, list):
+            self.labels = self.labels + [labels]
+        elif isinstance(labels, tuple):
+            for l in labels:
+                self.labels.append(l)
+        else:
+            self.labels.append(labels)
+
+        # sorts the labels ascending by their timestamp
+        self.labels.sort()
+
+    def write_label_file(self, filepath=None):
+        """
+        Writes previously added/loaded labels to a XML file. Uses the given filepath as destination path, if no path is
+        given, uses the path in label_file_path.
+
+        :param filepath: The path where the label file should be written to.
+        """
+
+        def get_subtree_timestamp(xml_tag_root, timestamp_entry):
+            """
+            Creates the subtree for a given timestamp, consisting of the unix time format (seconds) and a human-readable
+            output.
+
+            :param xml_tag_root: The tag name for the root of the subtree
+            :param timestamp_entry: The timestamp as unix time
+            :return: The root node of the XML subtree
+            """
+            timestamp_root = doc.createElement(xml_tag_root)
+
+            # add timestamp in unix format
+            timestamp = doc.createElement(self.TAG_TIMESTAMP)
+            timestamp.appendChild(doc.createTextNode(str(timestamp_entry)))
+            timestamp_root.appendChild(timestamp)
+
+            # add timestamp in human-readable format
+            timestamp_hr = doc.createElement(self.TAG_TIMESTAMP_HR)
+            timestamp_hr_text = datetime.fromtimestamp(timestamp_entry).strftime('%Y-%m-%d %H:%M:%S.%f')
+            timestamp_hr.appendChild(doc.createTextNode(timestamp_hr_text))
+            timestamp_root.appendChild(timestamp_hr)
+
+            return timestamp_root
+
+        if filepath is not None:
+            self.label_file_path = filepath.strip('.pcap') + '_labels.xml'
+
+        # Generate XML
+        doc = Document()
+        node = doc.createElement(self.TAG_ROOT)
+        node.setAttribute(self.ATTR_VERSION, self.ATTR_VERSION_VALUE)
+        for label in self.labels:
+            xml_tree = doc.createElement(self.TAG_ATTACK)
+
+            # add attack to XML tree
+            attack_name = doc.createElement(self.TAG_ATTACK_NAME)
+            attack_name.appendChild(doc.createTextNode(str(label.attack_name)))
+            xml_tree.appendChild(attack_name)
+            attack_note = doc.createElement(self.TAG_ATTACK_NOTE)
+            attack_note.appendChild(doc.createTextNode(str(label.attack_note)))
+            xml_tree.appendChild(attack_note)
+
+            # add timestamp_start to XML tree
+            xml_tree.appendChild(get_subtree_timestamp(self.TAG_TIMESTAMP_START, label.timestamp_start))
+
+            # add timestamp_end to XML tree
+            xml_tree.appendChild(get_subtree_timestamp(self.TAG_TIMESTAMP_END, label.timestamp_end))
+
+            node.appendChild(xml_tree)
+
+        doc.appendChild(node)
+
+        # Write XML to file
+        file = open(self.label_file_path, 'w')
+        file.write(doc.toprettyxml())
+        file.close()
+
+    def load_labels(self):
+        """
+        Loads the labels from an already existing label XML file located at label_file_path (set by constructor).
+
+        """
+
+        def get_value_from_node(node, tag_name, *child_number):
+            """
+            Returns the value located in the tag specified by tag_name from a given node. Walks therefor the
+            node's children along as indicated by child_number, e.g., childNumber = (1,2,) first goes to the 1st child, and
+            then to the 2nd child of the first child -> elem.childNodes[1].childNodes[2].
+            """
+            elem = node.getElementsByTagName(tag_name)
+            if len(elem) == 1:
+                elem = elem[0]
+                for c in child_number:
+                    if len(elem.childNodes) > 0:
+                        elem = elem.childNodes[c]
+                    else:
+                        return ""
+                return elem.data
+            else:
+                return ""
+
+        print("Label file found. Loading labels...")
+        try:
+            dom = parse(self.label_file_path)
+        except Exception:
+            print('ERROR: Provided label file could not be parsed. Ignoring label file')
+            return
+
+        # Check if version of parser and version of file match
+        version = dom.getElementsByTagName(self.TAG_ROOT)
+        if len(version) > 0:
+            version = version[0].getAttribute(self.ATTR_VERSION)
+            if version == [] or not version == self.ATTR_VERSION_VALUE:
+                print(
+                    "The file " + self.label_file_path + " was created by another version of ID2TLib.LabelManager. Ignoring label file.")
+
+        # Parse attacks from XML file
+        attacks = dom.getElementsByTagName(self.TAG_ATTACK)
+        count_labels = 0
+        for a in attacks:
+            attack_name = get_value_from_node(a, self.TAG_ATTACK_NAME, 0)
+            attack_note = get_value_from_node(a, self.TAG_ATTACK_NOTE, 0)
+            timestamp_start = get_value_from_node(a, self.TAG_TIMESTAMP_START, 1, 0)
+            timestamp_end = get_value_from_node(a, self.TAG_TIMESTAMP_END, 1, 0)
+            label = Label.Label(attack_name, float(timestamp_start), float(timestamp_end), attack_note)
+            self.labels.append(label)
+            count_labels += 1
+
+        print("Read " + str(count_labels) + " label(s) successfully.")

+ 45 - 0
code/ID2TLib/OtherGroupLib/SMB2.py

@@ -0,0 +1,45 @@
+from scapy.packet import *
+from scapy.fields import *
+from scapy.layers.netbios import NBTSession
+
+
+class SMB2_SYNC_Header(Packet):
+    namez = "SMB2Negociate Protocol Response Header"
+    fields_desc = [StrFixedLenField("Start","\xfeSMB", 4),
+                   LEShortField("StructureSize", 64),
+                   LEShortField("CreditCharge", 0),
+                   LEIntField("Status", 0),
+                   LEShortField("Command", 0),
+                   LEShortField("CreditResponse", 0),
+                   LEIntField("Flags", 0),
+                   LEIntField("NextCommand", 0),
+                   LELongField("MessageID", 0),
+                   LEIntField("Reserved", 0),
+                   LEIntField("TreeID", 0x0),
+                   LELongField("SessionID", 0),
+                   LELongField("Signature1", 0),
+                   LELongField("Signature2", 0)]
+
+
+#No Support of Security Buffer , Padding or Dialect Revision 0x0311
+class SMB2_Negotiate_Protocol_Response(Packet):
+    namez = "SMB2Negociate Protocol Response"
+    fields_desc = [LEShortField("StructureSize", 65),
+                   LEShortField("SecurityMode", 0),
+                   LEShortField("DialectRevision", 0x0),
+                   LEShortField("NegotiateContentCount/Reserved", 0),
+                   StrFixedLenField("ServerGuid", "", 16),
+                   LEIntField("Capabilities", 0),
+                   LEIntField("MaxTransactSize", 0),
+                   LEIntField("MaxReadSize", 0),
+                   LEIntField("MaxWriteSize", 0),
+                   LELongField("SystemTime", 0),
+                   LELongField("ServerStartTime", 0),
+                   LEShortField("SecurityBufferOffset", 0),
+                   LEShortField("SecurityBufferLength", 0),
+                   StrLenField("SecurityBlob", "", length_from=lambda x: x.ByteCount + 16),
+                   LEIntField("NegotiateContextOffset/Reserved2", 0)]
+
+
+bind_layers(NBTSession, SMB2_SYNC_Header,)
+bind_layers(SMB2_SYNC_Header, SMB2_Negotiate_Protocol_Response,)

+ 108 - 0
code/ID2TLib/OtherGroupLib/SMBLib.py

@@ -0,0 +1,108 @@
+from os import urandom
+from binascii import b2a_hex
+from random import random
+
+from ID2TLib.Utility import check_platform, get_filetime_format, get_rnd_boot_time
+
+# SMB port
+smb_port = 445
+
+# SMB versions
+smb_versions = {"1", "2.0", "2.1", "3.0", "3.0.2", "3.1.1"}
+smb_versions_per_win = {'win7': "2.1", 'win10': "3.1.1", 'winxp': "1", 'win8.1': "3.0.2", 'win8': "3.0",
+                        'winvista': "2.0", 'winnt': "1", "win2000": "1"}
+smb_versions_per_samba = {'3.6': "2.0", '4.0': "2.1", '4.1': "3.0", '4.3': "3.1.1"}
+# SMB dialects
+smb_dialects = ["PC NETWORK PROGRAM 1.0", "LANMAN1.0", "Windows for Workgroups 3.1a", "LM1.2X002", "LANMAN2.1",
+                "NT LM 0.12", "SMB 2.002", "SMB 2.???"]
+# SMB security blobs
+security_blob_windows = "\x60\x82\x01\x3c\x06\x06\x2b\x06\x01\x05\x05\x02\xa0\x82\x01\x30" \
+                        "\x30\x82\x01\x2c\xa0\x1a\x30\x18\x06\x0a\x2b\x06\x01\x04\x01\x82" \
+                        "\x37\x02\x02\x1e\x06\x0a\x2b\x06\x01\x04\x01\x82\x37\x02\x02\x0a" \
+                        "\xa2\x82\x01\x0c\x04\x82\x01\x08\x4e\x45\x47\x4f\x45\x58\x54\x53" \
+                        "\x01\x00\x00\x00\x00\x00\x00\x00\x60\x00\x00\x00\x70\x00\x00\x00" \
+                        "\xbc\x84\x03\x97\x6f\x80\x3b\x81\xa6\x45\x1b\x05\x92\x39\xde\x3d" \
+                        "\xd6\x91\x85\x49\x8a\xd0\x3b\x58\x87\x99\xb4\x98\xdf\xa6\x1d\x73" \
+                        "\x3b\x57\xbf\x05\x63\x5e\x30\xea\xa8\xd8\xd8\x45\xba\x80\x52\xa5" \
+                        "\x00\x00\x00\x00\x00\x00\x00\x00\x60\x00\x00\x00\x01\x00\x00\x00" \
+                        "\x00\x00\x00\x00\x00\x00\x00\x00\x5c\x33\x53\x0d\xea\xf9\x0d\x4d" \
+                        "\xb2\xec\x4a\xe3\x78\x6e\xc3\x08\x4e\x45\x47\x4f\x45\x58\x54\x53" \
+                        "\x03\x00\x00\x00\x01\x00\x00\x00\x40\x00\x00\x00\x98\x00\x00\x00" \
+                        "\xbc\x84\x03\x97\x6f\x80\x3b\x81\xa6\x45\x1b\x05\x92\x39\xde\x3d" \
+                        "\x5c\x33\x53\x0d\xea\xf9\x0d\x4d\xb2\xec\x4a\xe3\x78\x6e\xc3\x08" \
+                        "\x40\x00\x00\x00\x58\x00\x00\x00\x30\x56\xa0\x54\x30\x52\x30\x27" \
+                        "\x80\x25\x30\x23\x31\x21\x30\x1f\x06\x03\x55\x04\x03\x13\x18\x54" \
+                        "\x6f\x6b\x65\x6e\x20\x53\x69\x67\x6e\x69\x6e\x67\x20\x50\x75\x62" \
+                        "\x6c\x69\x63\x20\x4b\x65\x79\x30\x27\x80\x25\x30\x23\x31\x21\x30" \
+                        "\x1f\x06\x03\x55\x04\x03\x13\x18\x54\x6f\x6b\x65\x6e\x20\x53\x69" \
+                        "\x67\x6e\x69\x6e\x67\x20\x50\x75\x62\x6c\x69\x63\x20\x4b\x65\x79"
+security_blob_ubuntu = "\x60\x48\x06\x06\x2b\x06\x01\x05\x05\x02\xa0\x3e\x30\x3c\xa0\x0e" \
+                       "\x30\x0c\x06\x0a\x2b\x06\x01\x04\x01\x82\x37\x02\x02\x0a\xa3\x2a" \
+                       "\x30\x28\xa0\x26\x1b\x24\x6e\x6f\x74\x5f\x64\x65\x66\x69\x6e\x65" \
+                       "\x64\x5f\x69\x6e\x5f\x52\x46\x43\x34\x31\x37\x38\x40\x70\x6c\x65" \
+                       "\x61\x73\x65\x5f\x69\x67\x6e\x6f\x72\x65"
+security_blob_macos = "\x60\x7e\x06\x06\x2b\x06\x01\x05\x05\x02\xa0\x74\x30\x72\xa0\x44" \
+                      "\x30\x42\x06\x09\x2a\x86\x48\x82\xf7\x12\x01\x02\x02\x06\x09\x2a" \
+                      "\x86\x48\x86\xf7\x12\x01\x02\x02\x06\x06\x2a\x85\x70\x2b\x0e\x03" \
+                      "\x06\x06\x2b\x06\x01\x05\x05\x0e\x06\x0a\x2b\x06\x01\x04\x01\x82" \
+                      "\x37\x02\x02\x0a\x06\x06\x2b\x05\x01\x05\x02\x07\x06\x06\x2b\x06" \
+                      "\x01\x05\x02\x05\xa3\x2a\x30\x28\xa0\x26\x1b\x24\x6e\x6f\x74\x5f" \
+                      "\x64\x65\x66\x69\x6e\x65\x64\x5f\x69\x6e\x5f\x52\x46\x43\x34\x31" \
+                      "\x37\x38\x40\x70\x6c\x65\x61\x73\x65\x5f\x69\x67\x6e\x6f\x72\x65"
+
+
+def get_smb_version(platform: str):
+    """
+    Returns SMB version based on given platform
+
+    :param platform: the platform as string
+    :return: SMB version as string
+    """
+    check_platform(platform)
+    if platform is "linux":
+        return random.choice(list(smb_versions_per_samba.values()))
+    elif platform is "macos":
+        return "2.1"
+    else:
+        return smb_versions_per_win[platform]
+
+
+def get_smb_platform_data(platform: str, timestamp: float):
+    """
+    Gets platform-dependent data for SMB 2 packets
+
+    :param platform: the platform for which to get SMB 2 packet data
+    :param timestamp: a timestamp for calculating the boot-time
+    :return: server_guid, security_blob, capabilities, data_size and server_start_time of the given platform
+    """
+    check_platform(platform)
+    if platform == "linux":
+        server_guid = "ubuntu"
+        security_blob = security_blob_ubuntu
+        capabilities = 0x5
+        data_size = 0x800000
+        server_start_time = 0
+    elif platform == "macos":
+        server_guid = b2a_hex(urandom(15)).decode()
+        security_blob = security_blob_macos
+        capabilities = 0x6
+        data_size = 0x400000
+        server_start_time = 0
+    else:
+        server_guid = b2a_hex(urandom(15)).decode()
+        security_blob = security_blob_windows
+        capabilities = 0x7
+        data_size = 0x100000
+        server_start_time = get_filetime_format(get_rnd_boot_time(timestamp))
+    return server_guid, security_blob, capabilities, data_size, server_start_time
+
+
+def invalid_smb_version(version: str):
+    """
+    Prints an error and exits
+
+    :param version: the invalid SMB
+    """
+    print("\nInvalid smb version: " + version +
+          "\nPlease select one of the following versions: ", smb_versions)
+    exit(1)

+ 963 - 0
code/ID2TLib/OtherGroupLib/Statistics.py

@@ -0,0 +1,963 @@
+from operator import itemgetter
+from math import sqrt, ceil, log
+
+import os
+import time
+import ID2TLib.libpcapreader as pr
+import matplotlib
+
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+from ID2TLib.PcapFile import PcapFile
+from ID2TLib.StatsDatabase import StatsDatabase
+
+
+class Statistics:
+    def __init__(self, pcap_file: PcapFile):
+        """
+        Creates a new Statistics object.
+
+        :param pcap_file: A reference to the PcapFile object
+        """
+        # Fields
+        self.pcap_filepath = pcap_file.pcap_file_path
+        self.pcap_proc = None
+        self.do_extra_tests = False
+
+        # Create folder for statistics database if required
+        self.path_db = pcap_file.get_db_path()
+        path_dir = os.path.dirname(self.path_db)
+        if not os.path.isdir(path_dir):
+            os.makedirs(path_dir)
+
+        # Class instances
+        self.stats_db = StatsDatabase(self.path_db)
+
+    def load_pcap_statistics(self, flag_write_file: bool, flag_recalculate_stats: bool, flag_print_statistics: bool):
+        """
+        Loads the PCAP statistics for the file specified by pcap_filepath. If the database is not existing yet, the
+        statistics are calculated by the PCAP file processor and saved into the newly created database. Otherwise the
+        statistics are gathered directly from the existing database.
+
+        :param flag_write_file: Indicates whether the statistics should be written addiotionally into a text file (True)
+        or not (False)
+        :param flag_recalculate_stats: Indicates whether eventually existing statistics should be recalculated
+        :param flag_print_statistics: Indicates whether the gathered basic statistics should be printed to the terminal
+        """
+        # Load pcap and get loading time
+        time_start = time.clock()
+
+        # Inform user about recalculation of statistics and its reason
+        if flag_recalculate_stats:
+            print("Flag -r/--recalculate found. Recalculating statistics.")
+
+        # Recalculate statistics if database does not exist OR param -r/--recalculate is provided
+        if (not self.stats_db.get_db_exists()) or flag_recalculate_stats:
+            self.pcap_proc = pr.pcap_processor(self.pcap_filepath, str(self.do_extra_tests))
+            self.pcap_proc.collect_statistics()
+            self.pcap_proc.write_to_database(self.path_db)
+            outstring_datasource = "by PCAP file processor."
+        else:
+            outstring_datasource = "from statistics database."
+
+        # Load statistics from database
+        self.file_info = self.stats_db.get_file_info()
+
+        time_end = time.clock()
+        print("Loaded file statistics in " + str(time_end - time_start)[:4] + " sec " + outstring_datasource)
+
+        # Write statistics if param -e/--export provided
+        if flag_write_file:
+            self.write_statistics_to_file()
+
+        # Print statistics if param -s/--statistics provided
+        if flag_print_statistics:
+            self.print_statistics()
+
+    def get_file_information(self):
+        """
+        Returns a list of tuples, each containing a information of the file.
+
+        :return: a list of tuples, each consisting of (description, value, unit), where unit is optional.
+        """
+        return [("Pcap file", self.pcap_filepath),
+                ("Packets", self.get_packet_count(), "packets"),
+                ("Capture length", self.get_capture_duration(), "seconds"),
+                ("Capture start", self.get_pcap_timestamp_start()),
+                ("Capture end", self.get_pcap_timestamp_end())]
+
+    def get_general_file_statistics(self):
+        """
+        Returns a list of tuples, each containing a file statistic.
+
+        :return: a list of tuples, each consisting of (description, value, unit).
+        """
+        return [("Avg. packet rate", self.file_info['avgPacketRate'], "packets/sec"),
+                ("Avg. packet size", self.file_info['avgPacketSize'], "kbytes"),
+                ("Avg. packets sent", self.file_info['avgPacketsSentPerHost'], "packets"),
+                ("Avg. bandwidth in", self.file_info['avgBandwidthIn'], "kbit/s"),
+                ("Avg. bandwidth out", self.file_info['avgBandwidthOut'], "kbit/s")]
+
+    @staticmethod
+    def write_list(desc_val_unit_list, func, line_ending="\n"):
+        """
+        Takes a list of tuples (statistic name, statistic value, unit) as input, generates a string of these three values
+        and applies the function func on this string.
+
+        Before generating the string, it identifies text containing a float number, casts the string to a
+        float and rounds the value to two decimal digits.
+
+        :param desc_val_unit_list: The list of tuples consisting of (description, value, unit)
+        :param func: The function to be applied to each generated string
+        :param line_ending: The formatting string to be applied at the end of each string
+        """
+        for entry in desc_val_unit_list:
+            # Convert text containing float into float
+            (description, value) = entry[0:2]
+            if isinstance(value, str) and "." in value:
+                try:
+                    value = float(value)
+                except ValueError:
+                    pass  # do nothing -> value was not a float
+            # round float
+            if isinstance(value, float):
+                value = round(value, 4)
+            # write into file
+            if len(entry) == 3:
+                unit = entry[2]
+                func(description + ":\t" + str(value) + " " + unit + line_ending)
+            else:
+                func(description + ":\t" + str(value) + line_ending)
+
+    def print_statistics(self):
+        """
+        Prints the basic file statistics to the terminal.
+        """
+        print("\nPCAP FILE INFORMATION ------------------------------")
+        Statistics.write_list(self.get_file_information(), print, "")
+        print("\nGENERAL FILE STATISTICS ----------------------------")
+        Statistics.write_list(self.get_general_file_statistics(), print, "")
+        print("\n")
+
+
+    def calculate_entropy(self, frequency:list, normalized:bool = False):
+        """
+        Calculates entropy and normalized entropy of list of elements that have specific frequency
+        :param frequency: The frequency of the elements.
+        :param normalized: Calculate normalized entropy
+        :return: entropy or (entropy, normalized entropy)
+        """
+        entropy, normalizedEnt, n = 0, 0, 0
+        sumFreq = sum(frequency)
+        for i, x in enumerate(frequency):
+            p_x = float(frequency[i] / sumFreq)
+            if p_x > 0:
+                n += 1
+                entropy += - p_x * log(p_x, 2)
+        if normalized:
+            if log(n)>0:
+                normalizedEnt = entropy/log(n, 2)
+            return entropy, normalizedEnt
+        else:
+            return entropy
+
+    def calculate_complement_packet_rates(self, pps):
+        """
+        Calculates the complement packet rates of the background traffic packet rates for each interval.
+        Then normalize it to maximum boundary, which is the input parameter pps
+
+        :return: normalized packet rates for each time interval.
+        """
+        result = self.process_db_query(
+            "SELECT lastPktTimestamp,pktsCount FROM interval_statistics ORDER BY lastPktTimestamp")
+        # print(result)
+        bg_interval_pps = []
+        complement_interval_pps = []
+        intervalsSum = 0
+        if result:
+            # Get the interval in seconds
+            for i, row in enumerate(result):
+                if i < len(result) - 1:
+                    intervalsSum += ceil((int(result[i + 1][0]) * 10 ** -6) - (int(row[0]) * 10 ** -6))
+            interval = intervalsSum / (len(result) - 1)
+            # Convert timestamp from micro to seconds, convert packet rate "per interval" to "per second"
+            for row in result:
+                bg_interval_pps.append((int(row[0]) * 10 ** -6, int(row[1] / interval)))
+            # Find max PPS
+            maxPPS = max(bg_interval_pps, key=itemgetter(1))[1]
+
+            for row in bg_interval_pps:
+                complement_interval_pps.append((row[0], int(pps * (maxPPS - row[1]) / maxPPS)))
+
+        return complement_interval_pps
+
+
+    def get_tests_statistics(self):
+        """
+        Writes the calculated basic defects tests statistics into a file.
+        """
+        # self.stats_db._process_user_defined_query output is list of tuples, thus, we ned [0][0] to access data
+
+        def count_frequncy(valuesList):
+            values, frequency = [] , []
+            for x in valuesList:
+                if x in values:
+                    frequency[values.index(x)] += 1
+                else:
+                    values.append(x)
+                    frequency.append(1)
+            return values, frequency
+
+        ####### Payload Tests #######
+        sumPayloadCount = self.stats_db._process_user_defined_query("SELECT sum(payloadCount) FROM interval_statistics")
+        pktCount = self.stats_db._process_user_defined_query("SELECT packetCount FROM file_statistics")
+        if sumPayloadCount and pktCount:
+            payloadRatio=0
+            if(pktCount[0][0]!=0):
+                payloadRatio = float(sumPayloadCount[0][0] / pktCount[0][0] * 100)
+        else:
+            payloadRatio = -1
+
+        ####### TCP checksum Tests #######
+        incorrectChecksumCount = self.stats_db._process_user_defined_query("SELECT sum(incorrectTCPChecksumCount) FROM interval_statistics")
+        correctChecksumCount = self.stats_db._process_user_defined_query("SELECT avg(correctTCPChecksumCount) FROM interval_statistics")
+        if incorrectChecksumCount and correctChecksumCount:
+            incorrectChecksumRatio=0
+            if(incorrectChecksumCount[0][0] + correctChecksumCount[0][0])!=0:
+                incorrectChecksumRatio = float(incorrectChecksumCount[0][0]  / (incorrectChecksumCount[0][0] + correctChecksumCount[0][0] ) * 100)
+        else:
+            incorrectChecksumRatio = -1
+
+        ####### IP Src & Dst Tests #######
+        result = self.stats_db._process_user_defined_query("SELECT ipAddress,pktsSent,pktsReceived FROM ip_statistics")
+        data, srcFrequency, dstFrequency = [], [], []
+        if result:
+            for row in result:
+                srcFrequency.append(row[1])
+                dstFrequency.append(row[2])
+        ipSrcEntropy, ipSrcNormEntropy = self.calculate_entropy(srcFrequency, True)
+        ipDstEntropy, ipDstNormEntropy = self.calculate_entropy(dstFrequency, True)
+
+        newIPCount = self.stats_db._process_user_defined_query("SELECT newIPCount FROM interval_statistics")
+        ipNovelsPerInterval, ipNovelsPerIntervalFrequency = count_frequncy(newIPCount)
+        ipNoveltyDistEntropy = self.calculate_entropy(ipNovelsPerIntervalFrequency)
+
+        ####### Ports Tests #######
+        port0Count = self.stats_db._process_user_defined_query("SELECT SUM(portCount) FROM ip_ports WHERE portNumber = 0")
+        if not port0Count[0][0]:
+            port0Count = 0
+        else:
+            port0Count = port0Count[0][0]
+        reservedPortCount = self.stats_db._process_user_defined_query(
+            "SELECT SUM(portCount) FROM ip_ports WHERE portNumber IN (100,114,1023,1024,49151,49152,65535)")# could be extended
+        if not reservedPortCount[0][0]:
+            reservedPortCount = 0
+        else:
+            reservedPortCount = reservedPortCount[0][0]
+
+        ####### TTL Tests #######
+        result = self.stats_db._process_user_defined_query("SELECT ttlValue,SUM(ttlCount) FROM ip_ttl GROUP BY ttlValue")
+        data, frequency = [], []
+        for row in result:
+            frequency.append(row[1])
+        ttlEntropy, ttlNormEntropy  = self.calculate_entropy(frequency,True)
+        newTTLCount = self.stats_db._process_user_defined_query("SELECT newTTLCount FROM interval_statistics")
+        ttlNovelsPerInterval, ttlNovelsPerIntervalFrequency = count_frequncy(newTTLCount)
+        ttlNoveltyDistEntropy = self.calculate_entropy(ttlNovelsPerIntervalFrequency)
+
+        ####### Window Size Tests #######
+        result = self.stats_db._process_user_defined_query("SELECT winSize,SUM(winCount) FROM tcp_win GROUP BY winSize")
+        data, frequency = [], []
+        for row in result:
+            frequency.append(row[1])
+        winEntropy, winNormEntropy = self.calculate_entropy(frequency, True)
+        newWinSizeCount = self.stats_db._process_user_defined_query("SELECT newWinSizeCount FROM interval_statistics")
+        winNovelsPerInterval, winNovelsPerIntervalFrequency = count_frequncy(newWinSizeCount)
+        winNoveltyDistEntropy = self.calculate_entropy(winNovelsPerIntervalFrequency)
+
+        ####### ToS Tests #######
+        result = self.stats_db._process_user_defined_query(
+            "SELECT tosValue,SUM(tosCount) FROM ip_tos GROUP BY tosValue")
+        data, frequency = [], []
+        for row in result:
+            frequency.append(row[1])
+        tosEntropy, tosNormEntropy = self.calculate_entropy(frequency, True)
+        newToSCount = self.stats_db._process_user_defined_query("SELECT newToSCount FROM interval_statistics")
+        tosNovelsPerInterval, tosNovelsPerIntervalFrequency = count_frequncy(newToSCount)
+        tosNoveltyDistEntropy = self.calculate_entropy(tosNovelsPerIntervalFrequency)
+
+        ####### MSS Tests #######
+        result = self.stats_db._process_user_defined_query(
+            "SELECT mssValue,SUM(mssCount) FROM tcp_mss GROUP BY mssValue")
+        data, frequency = [], []
+        for row in result:
+            frequency.append(row[1])
+        mssEntropy, mssNormEntropy = self.calculate_entropy(frequency, True)
+        newMSSCount = self.stats_db._process_user_defined_query("SELECT newMSSCount FROM interval_statistics")
+        mssNovelsPerInterval, mssNovelsPerIntervalFrequency = count_frequncy(newMSSCount)
+        mssNoveltyDistEntropy = self.calculate_entropy(mssNovelsPerIntervalFrequency)
+
+        result = self.stats_db._process_user_defined_query("SELECT SUM(mssCount) FROM tcp_mss WHERE mssValue > 1460")
+        # The most used MSS < 1460. Calculate the ratio of the values bigger that 1460.
+        if not result[0][0]:
+            result = 0
+        else:
+            result = result[0][0]
+        bigMSS = (result / sum(frequency)) * 100
+
+        output = []
+        if self.do_extra_tests:
+            output = [("Payload ratio", payloadRatio, "%"),
+                ("Incorrect TCP checksum ratio", incorrectChecksumRatio, "%")]
+
+        output = output + [("# IP addresses", sum([x[0] for x in newIPCount]), ""),
+                ("IP Src Entropy", ipSrcEntropy, ""),
+                ("IP Src Normalized Entropy", ipSrcNormEntropy, ""),
+                ("IP Dst Entropy", ipDstEntropy, ""),
+                ("IP Dst Normalized Entropy", ipDstNormEntropy, ""),
+                ("IP Novelty Distribution Entropy", ipNoveltyDistEntropy, ""),
+                ("# TTL values", sum([x[0] for x in newTTLCount]), ""),
+                ("TTL Entropy", ttlEntropy, ""),
+                ("TTL Normalized Entropy", ttlNormEntropy, ""),
+                ("TTL Novelty Distribution Entropy", ttlNoveltyDistEntropy, ""),
+                ("# WinSize values", sum([x[0] for x in newWinSizeCount]), ""),
+                ("WinSize Entropy", winEntropy, ""),
+                ("WinSize Normalized Entropy", winNormEntropy, ""),
+                ("WinSize Novelty Distribution Entropy", winNoveltyDistEntropy, ""),
+                ("# ToS values",  sum([x[0] for x in newToSCount]), ""),
+                ("ToS Entropy", tosEntropy, ""),
+                ("ToS Normalized Entropy", tosNormEntropy, ""),
+                ("ToS Novelty Distribution Entropy", tosNoveltyDistEntropy, ""),
+                ("# MSS values", sum([x[0] for x in newMSSCount]), ""),
+                ("MSS Entropy", mssEntropy, ""),
+                ("MSS Normalized Entropy", mssNormEntropy, ""),
+                ("MSS Novelty Distribution Entropy", mssNoveltyDistEntropy, ""),
+                ("======================","","")]
+
+        # Reasoning the statistics values
+        if self.do_extra_tests:
+            if payloadRatio > 80:
+                output.append(("WARNING: Too high payload ratio", payloadRatio, "%."))
+            if payloadRatio < 30:
+                output.append(("WARNING: Too low payload ratio", payloadRatio, "% (Injecting attacks that are carried out in the packet payloads is not recommmanded)."))
+
+            if incorrectChecksumRatio > 5:
+                output.append(("WARNING: High incorrect TCP checksum ratio",incorrectChecksumRatio,"%."))
+
+        if ipSrcNormEntropy > 0.65:
+            output.append(("WARNING: High IP source normalized entropy",ipSrcNormEntropy,"."))
+        if ipSrcNormEntropy < 0.2:
+            output.append(("WARNING: Low IP source normalized entropy", ipSrcNormEntropy, "."))
+        if ipDstNormEntropy > 0.65:
+            output.append(("WARNING: High IP destination normalized entropy", ipDstNormEntropy, "."))
+        if ipDstNormEntropy < 0.2:
+            output.append(("WARNING: Low IP destination normalized entropy", ipDstNormEntropy, "."))
+
+        if ttlNormEntropy > 0.65:
+            output.append(("WARNING: High TTL normalized entropy", ttlNormEntropy, "."))
+        if ttlNormEntropy < 0.2:
+            output.append(("WARNING: Low TTL normalized entropy", ttlNormEntropy, "."))
+        if ttlNoveltyDistEntropy < 1:
+            output.append(("WARNING: Too low TTL novelty distribution entropy", ttlNoveltyDistEntropy,
+                           "(The distribution of the novel TTL values is suspicious)."))
+
+        if winNormEntropy > 0.6:
+            output.append(("WARNING: High Window Size normalized entropy", winNormEntropy, "."))
+        if winNormEntropy < 0.1:
+            output.append(("WARNING: Low Window Size normalized entropy", winNormEntropy, "."))
+        if winNoveltyDistEntropy < 4:
+            output.append(("WARNING: Low Window Size novelty distribution entropy", winNoveltyDistEntropy,
+                           "(The distribution of the novel Window Size values is suspicious)."))
+
+        if tosNormEntropy > 0.4:
+            output.append(("WARNING: High ToS normalized entropy", tosNormEntropy, "."))
+        if tosNormEntropy < 0.1:
+            output.append(("WARNING: Low ToS normalized entropy", tosNormEntropy, "."))
+        if tosNoveltyDistEntropy < 0.5:
+            output.append(("WARNING: Low ToS novelty distribution entropy", tosNoveltyDistEntropy,
+                           "(The distribution of the novel ToS values is suspicious)."))
+
+        if mssNormEntropy > 0.4:
+            output.append(("WARNING: High MSS normalized entropy", mssNormEntropy, "."))
+        if mssNormEntropy < 0.1:
+            output.append(("WARNING: Low MSS normalized entropy", mssNormEntropy, "."))
+        if mssNoveltyDistEntropy < 0.5:
+            output.append(("WARNING: Low MSS novelty distribution entropy", mssNoveltyDistEntropy,
+                           "(The distribution of the novel MSS values is suspicious)."))
+
+        if bigMSS > 50:
+            output.append(("WARNING: High ratio of MSS > 1460", bigMSS, "% (High fragmentation rate in Ethernet)."))
+
+        if port0Count > 0:
+            output.append(("WARNING: Port number 0 is used in ",port0Count,"packets (awkward-looking port)."))
+        if reservedPortCount > 0:
+            output.append(("WARNING: Reserved port numbers are used in ",reservedPortCount,"packets (uncommonly-used ports)."))
+
+        return output
+
+    def write_statistics_to_file(self):
+        """
+        Writes the calculated basic statistics into a file.
+        """
+
+        def _write_header(title: str):
+            """
+            Writes the section header into the open file.
+
+            :param title: The section title
+            """
+            target.write("====================== \n")
+            target.write(title + " \n")
+            target.write("====================== \n")
+
+        target = open(self.pcap_filepath + ".stat", 'w')
+        target.truncate()
+
+        _write_header("PCAP file information")
+        Statistics.write_list(self.get_file_information(), target.write)
+
+        _write_header("General statistics")
+        Statistics.write_list(self.get_general_file_statistics(), target.write)
+
+        _write_header("Tests statistics")
+        Statistics.write_list(self.get_tests_statistics(), target.write)
+
+        target.close()
+
+    def get_capture_duration(self):
+        """
+        :return: The duration of the capture in seconds
+        """
+        return self.file_info['captureDuration']
+
+    def get_pcap_timestamp_start(self):
+        """
+        :return: The timestamp of the first packet in the PCAP file
+        """
+        return self.file_info['timestampFirstPacket']
+
+    def get_pcap_timestamp_end(self):
+        """
+        :return: The timestamp of the last packet in the PCAP file
+        """
+        return self.file_info['timestampLastPacket']
+
+    def get_pps_sent(self, ip_address: str):
+        """
+        Calculates the sent packets per seconds for a given IP address.
+
+        :param ip_address: The IP address whose packets per second should be calculated
+        :return: The sent packets per seconds for the given IP address
+        """
+        packets_sent = self.stats_db.process_db_query("SELECT pktsSent from ip_statistics WHERE ipAddress=?", False,
+                                                      (ip_address,))
+        capture_duration = float(self.get_capture_duration())
+        return int(float(packets_sent) / capture_duration)
+
+    def get_pps_received(self, ip_address: str):
+        """
+        Calculate the packets per second received for a given IP address.
+
+        :param ip_address: The IP address used for the calculation
+        :return: The number of packets per second received
+        """
+        packets_received = self.stats_db.process_db_query("SELECT pktsReceived FROM ip_statistics WHERE ipAddress=?",
+                                                          False,
+                                                          (ip_address,))
+        capture_duration = float(self.get_capture_duration())
+        return int(float(packets_received) / capture_duration)
+
+    def get_packet_count(self):
+        """
+        :return: The number of packets in the loaded PCAP file
+        """
+        return self.file_info['packetCount']
+
+    def get_most_used_ip_address(self):
+        """
+        :return: The IP address/addresses with the highest sum of packets sent and received
+        """
+        return self.process_db_query("most_used(ipAddress)")
+
+    def get_ttl_distribution(self, ipAddress: str):
+        result = self.process_db_query('SELECT ttlValue, ttlCount from ip_ttl WHERE ipAddress="' + ipAddress + '"')
+        result_dict = {key: value for (key, value) in result}
+        return result_dict
+
+    def get_mss_distribution(self, ipAddress: str):
+        result = self.process_db_query('SELECT mssValue, mssCount from tcp_mss WHERE ipAddress="' + ipAddress + '"')
+        result_dict = {key: value for (key, value) in result}
+        return result_dict
+
+    def get_win_distribution(self, ipAddress: str):
+        result = self.process_db_query('SELECT winSize, winCount from tcp_win WHERE ipAddress="' + ipAddress + '"')
+        result_dict = {key: value for (key, value) in result}
+        return result_dict
+
+    def get_tos_distribution(self, ipAddress: str):
+        result = self.process_db_query('SELECT tosValue, tosCount from ip_tos WHERE ipAddress="' + ipAddress + '"')
+        result_dict = {key: value for (key, value) in result}
+        return result_dict
+
+    def get_ip_address_count(self):
+        return self.process_db_query("SELECT COUNT(*) FROM ip_statistics")
+
+    def get_ip_addresses(self):
+        return self.process_db_query("SELECT ipAddress FROM ip_statistics")
+
+    def get_random_ip_address(self, count: int = 1):
+        """
+        :param count: The number of IP addreses to return
+        :return: A randomly chosen IP address from the dataset or iff param count is greater than one, a list of randomly
+         chosen IP addresses
+        """
+        if count == 1:
+            return self.process_db_query("random(all(ipAddress))")
+        else:
+            ip_address_list = []
+            for i in range(0, count):
+                ip_address_list.append(self.process_db_query("random(all(ipAddress))"))
+            return ip_address_list
+
+    def get_ip_address_from_mac(self, macAddress: str):
+        """
+        :param macAddress: the MAC address of which the IP shall be returned, if existing in DB
+        :return: the IP address used in the dataset by a given MAC address
+        """
+        return self.process_db_query('ipAddress(macAddress=' + macAddress + ")")
+
+    def get_mac_address(self, ipAddress: str):
+        """
+        :return: The MAC address used in the dataset for the given IP address.
+        """
+        return self.process_db_query('macAddress(ipAddress=' + ipAddress + ")")
+
+    def get_most_used_mss(self, ipAddress: str):
+        """
+        :param ipAddress: The IP address whose used MSS should be determined
+        :return: The TCP MSS value used by the IP address, or if the IP addresses never specified a MSS,
+        then None is returned
+        """
+        mss_value = self.process_db_query('SELECT mssValue from tcp_mss WHERE ipAddress="' + ipAddress + '" ORDER BY mssCount DESC LIMIT 1')
+        if isinstance(mss_value, int):
+            return mss_value
+        else:
+            return None
+
+    def get_most_used_ttl(self, ipAddress: str):
+        """
+        :param ipAddress: The IP address whose used TTL should be determined
+        :return: The TTL value used by the IP address, or if the IP addresses never specified a TTL,
+        then None is returned
+        """
+        ttl_value = self.process_db_query(
+            'SELECT ttlValue from ip_ttl WHERE ipAddress="' + ipAddress + '" ORDER BY ttlCount DESC LIMIT 1')
+        if isinstance(ttl_value, int):
+            return ttl_value
+        else:
+            return None
+
+
+    def get_statistics_database(self):
+        """
+        :return: A reference to the statistics database object
+        """
+        return self.stats_db
+
+    def process_db_query(self, query_string_in: str, print_results: bool = False):
+        """
+        Executes a string identified previously as a query. This can be a standard SQL SELECT/INSERT query or a named
+        query.
+
+        :param query_string_in: The query to be processed
+        :param print_results: Indicates whether the results should be printed to terminal
+        :return: The result of the query
+        """
+        return self.stats_db.process_db_query(query_string_in, print_results)
+
+    def is_query(self, value: str):
+        """
+        Checks whether the given string is a standard SQL query (SELECT, INSERT) or a named query.
+
+        :param value: The string to be checked
+        :return: True if the string is recognized as a query, otherwise False.
+        """
+        if not isinstance(value, str):
+            return False
+        else:
+            return (any(x in value.lower().strip() for x in self.stats_db.get_all_named_query_keywords()) or
+                    any(x in value.lower().strip() for x in self.stats_db.get_all_sql_query_keywords()))
+
+
+    def calculate_standard_deviation(self, lst):
+        """
+        Calculates the standard deviation of a list of numbers.
+        :param lst: The list of numbers to calculate its SD.
+
+        """
+        num_items = len(lst)
+        mean = sum(lst) / num_items
+        differences = [x - mean for x in lst]
+        sq_differences = [d ** 2 for d in differences]
+        ssd = sum(sq_differences)
+        variance = ssd / num_items
+        sd = sqrt(variance)
+        return sd
+
+
+    def plot_statistics(self, format: str = 'pdf'): #'png'
+        """
+        Plots the statistics associated with the dataset.
+        :param format: The format to be used to save the statistics diagrams.
+        """
+
+        def plot_distribution(queryOutput, title,  xLabel, yLabel, file_ending: str):
+            plt.gcf().clear()
+            graphx, graphy = [], []
+            for row in queryOutput:
+                graphx.append(row[0])
+                graphy.append(row[1])
+            plt.autoscale(enable=True, axis='both')
+            plt.title(title)
+            plt.xlabel(xLabel)
+            plt.ylabel(yLabel)
+            width = 0.1
+            plt.xlim([0, max(graphx)])
+            plt.grid(True)
+            plt.bar(graphx, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
+            out = self.pcap_filepath.replace('.pcap', '_plot-' + title + file_ending)
+            plt.savefig(out,dpi=500)
+            return out
+
+        def plot_ttl(file_ending: str):
+            queryOutput = self.stats_db._process_user_defined_query(
+                "SELECT ttlValue, SUM(ttlCount) FROM ip_ttl GROUP BY ttlValue")
+            title = "TTL Distribution"
+            xLabel = "TTL Value"
+            yLabel = "Number of Packets"
+            if queryOutput:
+                return plot_distribution(queryOutput, title, xLabel, yLabel, file_ending)
+
+        def plot_mss(file_ending: str):
+            queryOutput = self.stats_db._process_user_defined_query(
+                "SELECT mssValue, SUM(mssCount) FROM tcp_mss GROUP BY mssValue")
+            title = "MSS Distribution"
+            xLabel = "MSS Value"
+            yLabel = "Number of Packets"
+            if queryOutput:
+                return plot_distribution(queryOutput, title, xLabel, yLabel, file_ending)
+
+        def plot_win(file_ending: str):
+            queryOutput = self.stats_db._process_user_defined_query(
+                "SELECT winSize, SUM(winCount) FROM tcp_win GROUP BY winSize")
+            title = "Window Size Distribution"
+            xLabel = "Window Size"
+            yLabel = "Number of Packets"
+            if queryOutput:
+                return plot_distribution(queryOutput, title, xLabel, yLabel, file_ending)
+
+        def plot_protocol(file_ending: str):
+            plt.gcf().clear()
+            result = self.stats_db._process_user_defined_query(
+                "SELECT protocolName, SUM(protocolCount) FROM ip_protocols GROUP BY protocolName")
+            if (result):
+                graphx, graphy = [], []
+                for row in result:
+                    graphx.append(row[0])
+                    graphy.append(row[1])
+                plt.autoscale(enable=True, axis='both')
+                plt.title("Protocols Distribution")
+                plt.xlabel('Protocols')
+                plt.ylabel('Number of Packets')
+                width = 0.1
+                plt.xlim([0, len(graphx)])
+                plt.grid(True)
+
+                # Protocols' names on x-axis
+                x = range(0,len(graphx))
+                my_xticks = graphx
+                plt.xticks(x, my_xticks)
+
+                plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
+                out = self.pcap_filepath.replace('.pcap', '_plot-protocol' + file_ending)
+                plt.savefig(out,dpi=500)
+                return out
+            else:
+                print("Error plot protocol: No protocol values found!")
+
+        def plot_port(file_ending: str):
+            plt.gcf().clear()
+            result = self.stats_db._process_user_defined_query(
+                "SELECT portNumber, SUM(portCount) FROM ip_ports GROUP BY portNumber")
+            graphx, graphy = [], []
+            for row in result:
+                graphx.append(row[0])
+                graphy.append(row[1])
+            plt.autoscale(enable=True, axis='both')
+            plt.title("Ports Distribution")
+            plt.xlabel('Ports Numbers')
+            plt.ylabel('Number of Packets')
+            width = 0.1
+            plt.xlim([0, max(graphx)])
+            plt.grid(True)
+            plt.bar(graphx, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
+            out = self.pcap_filepath.replace('.pcap', '_plot-port' + file_ending)
+            plt.savefig(out,dpi=500)
+            return out
+
+        # This distribution is not drawable for big datasets
+        def plot_ip_src(file_ending: str):
+            plt.gcf().clear()
+            result = self.stats_db._process_user_defined_query(
+                "SELECT ipAddress, pktsSent FROM ip_statistics")
+            graphx, graphy = [], []
+            for row in result:
+                graphx.append(row[0])
+                graphy.append(row[1])
+            plt.autoscale(enable=True, axis='both')
+            plt.title("Source IP Distribution")
+            plt.xlabel('Source IP')
+            plt.ylabel('Number of Packets')
+            width = 0.1
+            plt.xlim([0, len(graphx)])
+            plt.grid(True)
+
+            # IPs on x-axis
+            x = range(0, len(graphx))
+            my_xticks = graphx
+            plt.xticks(x, my_xticks, rotation='vertical', fontsize=5)
+            plt.tight_layout()
+
+            # limit the number of xticks
+            plt.locator_params(axis='x', nbins=20)
+
+            plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
+            out = self.pcap_filepath.replace('.pcap', '_plot-ip-src' + file_ending)
+            plt.savefig(out, dpi=500)
+            return out
+
+        # This distribution is not drawable for big datasets
+        def plot_ip_dst(file_ending: str):
+            plt.gcf().clear()
+            result = self.stats_db._process_user_defined_query(
+                "SELECT ipAddress, pktsReceived FROM ip_statistics")
+            graphx, graphy = [], []
+            for row in result:
+                graphx.append(row[0])
+                graphy.append(row[1])
+            plt.autoscale(enable=True, axis='both')
+            plt.title("Destination IP Distribution")
+            plt.xlabel('Destination IP')
+            plt.ylabel('Number of Packets')
+            width = 0.1
+            plt.xlim([0, len(graphx)])
+            plt.grid(True)
+
+            # IPs on x-axis
+            x = range(0, len(graphx))
+            my_xticks = graphx
+            plt.xticks(x, my_xticks, rotation='vertical', fontsize=5)
+            plt.tight_layout()
+
+            # limit the number of xticks
+            plt.locator_params(axis='x', nbins=20)
+
+            plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
+            out = self.pcap_filepath.replace('.pcap', '_plot-ip-dst' + file_ending)
+            plt.savefig(out, dpi=500)
+            return out
+
+        def plot_interval_statistics(queryOutput, title,  xLabel, yLabel, file_ending: str):
+            plt.gcf().clear()
+            graphx, graphy = [], []
+            for row in queryOutput:
+                graphx.append(row[0])
+                graphy.append(row[1])
+            plt.autoscale(enable=True, axis='both')
+            plt.title(title)
+            plt.xlabel(xLabel)
+            plt.ylabel(yLabel)
+            width = 0.5
+            plt.xlim([0, len(graphx)])
+            plt.grid(True)
+
+            # timestamp on x-axis
+            x = range(0, len(graphx))
+
+            # limit the number of xticks
+            plt.locator_params(axis='x', nbins=20)
+
+            plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
+            out = self.pcap_filepath.replace('.pcap', '_plot-' + title + file_ending)
+            plt.savefig(out, dpi=500)
+            return out
+
+        def plot_interval_pktCount(file_ending: str):
+            queryOutput = self.stats_db._process_user_defined_query(
+                "SELECT lastPktTimestamp, pktsCount FROM interval_statistics ORDER BY lastPktTimestamp")
+            title = "Packet Rate"
+            xLabel = "Time Interval"
+            yLabel = "Number of Packets"
+            if queryOutput:
+                return plot_interval_statistics(queryOutput, title, xLabel, yLabel, file_ending)
+
+        def plot_interval_ip_src_ent(file_ending: str):
+            queryOutput = self.stats_db._process_user_defined_query(
+                "SELECT lastPktTimestamp, ipSrcEntropy FROM interval_statistics ORDER BY lastPktTimestamp")
+            title = "Source IP Entropy"
+            xLabel = "Time Interval"
+            yLabel = "Entropy"
+            if queryOutput:
+                return plot_interval_statistics(queryOutput, title, xLabel, yLabel, file_ending)
+
+        def plot_interval_ip_dst_ent(file_ending: str):
+            queryOutput = self.stats_db._process_user_defined_query(
+                "SELECT lastPktTimestamp, ipDstEntropy FROM interval_statistics ORDER BY lastPktTimestamp")
+            title = "Destination IP Entropy"
+            xLabel = "Time Interval"
+            yLabel = "Entropy"
+            if queryOutput:
+                return plot_interval_statistics(queryOutput, title, xLabel, yLabel, file_ending)
+
+        def plot_interval_new_ip(file_ending: str):
+            queryOutput = self.stats_db._process_user_defined_query(
+                "SELECT lastPktTimestamp, newIPCount FROM interval_statistics ORDER BY lastPktTimestamp")
+            title = "IP Novelty Distribution"
+            xLabel = "Time Interval"
+            yLabel = "Novel values count"
+            if queryOutput:
+                return plot_interval_statistics(queryOutput, title, xLabel, yLabel, file_ending)
+
+        def plot_interval_new_port(file_ending: str):
+            queryOutput = self.stats_db._process_user_defined_query(
+                "SELECT lastPktTimestamp, newPortCount FROM interval_statistics ORDER BY lastPktTimestamp")
+            title = "Port Novelty Distribution"
+            xLabel = "Time Interval"
+            yLabel = "Novel values count"
+            if queryOutput:
+                return plot_interval_statistics(queryOutput, title, xLabel, yLabel, file_ending)
+
+        def plot_interval_new_ttl(file_ending: str):
+            queryOutput = self.stats_db._process_user_defined_query(
+                "SELECT lastPktTimestamp, newTTLCount FROM interval_statistics ORDER BY lastPktTimestamp")
+            title = "TTL Novelty Distribution"
+            xLabel = "Time Interval"
+            yLabel = "Novel values count"
+            if queryOutput:
+                return plot_interval_statistics(queryOutput, title, xLabel, yLabel, file_ending)
+
+        def plot_interval_new_tos(file_ending: str):
+            queryOutput = self.stats_db._process_user_defined_query(
+                "SELECT lastPktTimestamp, newToSCount FROM interval_statistics ORDER BY lastPktTimestamp")
+            title = "ToS Novelty Distribution"
+            xLabel = "Time Interval"
+            yLabel = "Novel values count"
+            if queryOutput:
+                return plot_interval_statistics(queryOutput, title, xLabel, yLabel, file_ending)
+
+        def plot_interval_new_win_size(file_ending: str):
+            queryOutput = self.stats_db._process_user_defined_query(
+                "SELECT lastPktTimestamp, newWinSizeCount FROM interval_statistics ORDER BY lastPktTimestamp")
+            title = "Window Size Novelty Distribution"
+            xLabel = "Time Interval"
+            yLabel = "Novel values count"
+            if queryOutput:
+                return plot_interval_statistics(queryOutput, title, xLabel, yLabel, file_ending)
+
+        def plot_interval_new_mss(file_ending: str):
+            queryOutput = self.stats_db._process_user_defined_query(
+                "SELECT lastPktTimestamp, newMSSCount FROM interval_statistics ORDER BY lastPktTimestamp")
+            title = "MSS Novelty Distribution"
+            xLabel = "Time Interval"
+            yLabel = "Novel values count"
+            if queryOutput:
+                return plot_interval_statistics(queryOutput, title, xLabel, yLabel, file_ending)
+
+        def plot_interval_ip_dst_cum_ent(file_ending: str):
+            plt.gcf().clear()
+            result = self.stats_db._process_user_defined_query(
+                "SELECT lastPktTimestamp, ipDstCumEntropy FROM interval_statistics ORDER BY lastPktTimestamp")
+            graphx, graphy = [], []
+            for row in result:
+                graphx.append(row[0])
+                graphy.append(row[1])
+            # If entropy was not calculated do not plot the graph
+            if graphy[0] != -1:
+                plt.autoscale(enable=True, axis='both')
+                plt.title("Destination IP Cumulative Entropy")
+                # plt.xlabel('Timestamp')
+                plt.xlabel('Time Interval')
+                plt.ylabel('Entropy')
+                plt.xlim([0, len(graphx)])
+                plt.grid(True)
+
+                # timestamp on x-axis
+                x = range(0, len(graphx))
+                # my_xticks = graphx
+                # plt.xticks(x, my_xticks, rotation='vertical', fontsize=5)
+                # plt.tight_layout()
+
+                # limit the number of xticks
+                plt.locator_params(axis='x', nbins=20)
+
+                plt.plot(x, graphy, 'r')
+                out = self.pcap_filepath.replace('.pcap', '_plot-interval-ip-dst-cum-ent' + file_ending)
+                plt.savefig(out, dpi=500)
+                return out
+
+        def plot_interval_ip_src_cum_ent(file_ending: str):
+            plt.gcf().clear()
+
+            result = self.stats_db._process_user_defined_query(
+                "SELECT lastPktTimestamp, ipSrcCumEntropy FROM interval_statistics ORDER BY lastPktTimestamp")
+            graphx, graphy = [], []
+            for row in result:
+                graphx.append(row[0])
+                graphy.append(row[1])
+            # If entropy was not calculated do not plot the graph
+            if graphy[0] != -1:
+                plt.autoscale(enable=True, axis='both')
+                plt.title("Source IP Cumulative Entropy")
+                # plt.xlabel('Timestamp')
+                plt.xlabel('Time Interval')
+                plt.ylabel('Entropy')
+                plt.xlim([0, len(graphx)])
+                plt.grid(True)
+
+                # timestamp on x-axis
+                x = range(0, len(graphx))
+                # my_xticks = graphx
+                # plt.xticks(x, my_xticks, rotation='vertical', fontsize=5)
+                # plt.tight_layout()
+
+                # limit the number of xticks
+                plt.locator_params(axis='x', nbins=20)
+
+                plt.plot(x, graphy, 'r')
+                out = self.pcap_filepath.replace('.pcap', '_plot-interval-ip-src-cum-ent' + file_ending)
+                plt.savefig(out, dpi=500)
+                return out
+
+        ttl_out_path = plot_ttl('.' + format)
+        mss_out_path = plot_mss('.' + format)
+        win_out_path = plot_win('.' + format)
+        protocol_out_path = plot_protocol('.' + format)
+        plot_interval_pktCount = plot_interval_pktCount('.' + format)
+        plot_interval_ip_src_ent = plot_interval_ip_src_ent('.' + format)
+        plot_interval_ip_dst_ent = plot_interval_ip_dst_ent('.' + format)
+        plot_interval_ip_src_cum_ent = plot_interval_ip_src_cum_ent('.' + format)
+        plot_interval_ip_dst_cum_ent = plot_interval_ip_dst_cum_ent('.' + format)
+        plot_interval_new_ip = plot_interval_new_ip('.' + format)
+        plot_interval_new_port = plot_interval_new_port('.' + format)
+        plot_interval_new_ttl = plot_interval_new_ttl('.' + format)
+        plot_interval_new_tos = plot_interval_new_tos('.' + format)
+        plot_interval_new_win_size = plot_interval_new_win_size('.' + format)
+        plot_interval_new_mss = plot_interval_new_mss('.' + format)
+
+        ## Time consuming plot
+        # port_out_path = plot_port('.' + format)
+        ## Not drawable for too many IPs
+        # ip_src_out_path = plot_ip_src('.' + format)
+        # ip_dst_out_path = plot_ip_dst('.' + format)
+
+        print("Saved plots in the input PCAP directory.")

+ 269 - 0
code/ID2TLib/OtherGroupLib/Utility.py

@@ -0,0 +1,269 @@
+import ipaddress
+
+from random import randint, uniform
+from os import urandom
+from datetime import datetime
+from calendar import timegm
+from lea import Lea
+
+platforms = {"win7", "win10", "winxp", "win8.1", "macos", "linux", "win8", "winvista", "winnt", "win2000"}
+platform_probability = {"win7": 48.43, "win10": 27.99, "winxp": 6.07, "win8.1": 6.07, "macos": 5.94, "linux": 3.38,
+                        "win8": 1.35, "winvista": 0.46, "winnt": 0.31}
+
+x86_nops = {b'\x90', b'\xfc', b'\xfd', b'\xf8', b'\xf9', b'\xf5', b'\x9b'}
+x86_pseudo_nops = {b'\x97', b'\x96', b'\x95', b'\x93', b'\x92', b'\x91', b'\x99', b'\x4d', b'\x48', b'\x47', b'\x4f',
+                   b'\x40', b'\x41', b'\x37', b'\x3f', b'\x27', b'\x2f', b'\x46', b'\x4e', b'\x98', b'\x9f', b'\x4a',
+                   b'\x44', b'\x42', b'\x43', b'\x49', b'\x4b', b'\x45', b'\x4c', b'\x60', b'\x0e', b'\x1e', b'\x50',
+                   b'\x55', b'\x53', b'\x51', b'\x57', b'\x52', b'\x06', b'\x56', b'\x54', b'\x16', b'\x58', b'\x5d',
+                   b'\x5b', b'\x59', b'\x5f', b'\x5a', b'\x5e', b'\xd6'}
+forbidden_chars = [b'\x00', b'\x0a', b'\x0d']
+
+
+def update_timestamp(timestamp, pps, delay=0):
+    """
+    Calculates the next timestamp to be used based on the packet per second rate (pps) and the maximum delay.
+
+    :return: Timestamp to be used for the next packet.
+    """
+    if delay == 0:
+        # Calculate request timestamp
+        # To imitate the bursty behavior of traffic
+        randomdelay = Lea.fromValFreqsDict({1 / pps: 70, 2 / pps: 20, 5 / pps: 7, 10 / pps: 3})
+        return timestamp + uniform(1 / pps, randomdelay.random())
+    else:
+        # Calculate reply timestamp
+        randomdelay = Lea.fromValFreqsDict({2 * delay: 70, 3 * delay: 20, 5 * delay: 7, 10 * delay: 3})
+        return timestamp + uniform(1 / pps + delay, 1 / pps + randomdelay.random())
+
+
+def get_interval_pps(complement_interval_pps, timestamp):
+    """
+    Gets the packet rate (pps) for a specific time interval.
+
+    :param complement_interval_pps: an array of tuples (the last timestamp in the interval, the packet rate in the
+    corresponding interval).
+    :param timestamp: the timestamp at which the packet rate is required.
+    :return: the corresponding packet rate (pps) .
+    """
+    for row in complement_interval_pps:
+        if timestamp<=row[0]:
+            return row[1]
+    return complement_interval_pps[-1][1] # in case the timstamp > capture max timestamp
+
+
+def get_nth_random_element(*element_list):
+    """
+    Returns the n-th element of every list from an arbitrary number of given lists.
+    For example, list1 contains IP addresses, list 2 contains MAC addresses. Use of this function ensures that
+    the n-th IP address uses always the n-th MAC address.
+    :param element_list: An arbitrary number of lists.
+    :return: A tuple of the n-th element of every list.
+    """
+    range_max = min([len(x) for x in element_list])
+    if range_max > 0: range_max -= 1
+    n = randint(0, range_max)
+    return tuple(x[n] for x in element_list)
+
+
+def index_increment(number: int, max: int):
+            if number + 1 < max:
+                return number + 1
+            else:
+                return 0
+
+
+def get_rnd_os():
+    """
+    Chooses random platform over an operating system probability distribution
+
+    :return: random platform as string
+    """
+    os_dist = Lea.fromValFreqsDict(platform_probability)
+    return os_dist.random()
+
+
+def check_platform(platform: str):
+    """
+    Checks if the given platform is currently supported
+    if not exits with error
+
+    :param platform: the platform, which should be validated
+    """
+    if platform not in platforms:
+        print("\nERROR: Invalid platform: " + platform + "." +
+              "\n Please select one of the following platforms: ", platforms)
+        exit(1)
+
+
+def get_ip_range(start_ip: str, end_ip: str):
+    """
+    Generates a list of IPs of a given range. If the start_ip is greater than the end_ip, the reverse range is generated
+
+    :param start_ip: the start_ip of the desired IP-range
+    :param end_ip:  the end_ip of the desired IP-range
+    :return: a list of all IPs in the desired IP-range, including start-/end_ip
+    """
+    start = ipaddress.ip_address(start_ip)
+    end = ipaddress.ip_address(end_ip)
+    ips = []
+    if start < end:
+        while start <= end:
+            ips.append(start.exploded)
+            start = start+1
+    elif start > end:
+        while start >= end:
+            ips.append(start.exploded)
+            start = start-1
+    else:
+        ips.append(start_ip)
+    return ips
+
+
+def generate_source_port_from_platform(platform: str, previousPort=0):
+    """
+    Generates the next source port according to the TCP-port-selection strategy of the given platform
+
+    :param platform: the platform for which to generate source ports
+    :param previousPort: the previously used/generated source port. Must be 0 if no port was generated before
+    :return: the next source port for the given platform
+    """
+    check_platform(platform)
+    if platform in {"winnt", "winxp", "win2000"}:
+        if (previousPort == 0) or (previousPort + 1 > 5000):
+            return randint(1024, 5000)
+        else:
+            return previousPort + 1
+    elif platform == "linux":
+        return randint(32768, 61000)
+    else:
+        if (previousPort == 0) or (previousPort + 1 > 65535):
+            return randint(49152, 65535)
+        else:
+            return previousPort + 1
+
+
+def get_filetime_format(timestamp):
+    """
+    Converts a timestamp into MS FILETIME format
+
+    :param timestamp: a timestamp in seconds
+    :return: MS FILETIME timestamp
+    """
+    boot_datetime = datetime.fromtimestamp(timestamp)
+    if boot_datetime.tzinfo is None or boot_datetime.tzinfo.utcoffset(boot_datetime) is None:
+        boot_datetime = boot_datetime.replace(tzinfo=boot_datetime.tzname())
+    boot_filetime = 116444736000000000 + (timegm(boot_datetime.timetuple()) * 10000000)
+    return boot_filetime + (boot_datetime.microsecond * 10)
+
+
+def get_rnd_boot_time(timestamp, platform="winxp"):
+    """
+    Generates a random boot time based on a given timestamp and operating system
+
+    :param timestamp: a timestamp in seconds
+    :param platform: a platform as string as specified in check_platform above. default is winxp. this param is optional
+    :return: timestamp of random boot time in seconds since EPOCH
+    """
+    check_platform(platform)
+    if platform is "linux":
+        uptime_in_days = Lea.fromValFreqsDict({3: 50, 7: 25, 14: 12.5, 31: 6.25, 92: 3.125, 183: 1.5625,
+                                               365: 0.78125, 1461: 0.390625, 2922: 0.390625})
+    elif platform is "macos":
+        uptime_in_days = Lea.fromValFreqsDict({7: 50, 14: 25, 31: 12.5, 92: 6.25, 183: 3.125, 365: 3.076171875,
+                                               1461: 0.048828125})
+    else:
+        uptime_in_days = Lea.fromValFreqsDict({3: 50, 7: 25, 14: 12.5, 31: 6.25, 92: 3.125, 183: 1.5625,
+                                               365: 0.78125, 1461: 0.78125})
+    timestamp -= randint(0, uptime_in_days.random()*86400)
+    return timestamp
+
+
+def get_rnd_x86_nop(count=1, side_effect_free=False, char_filter=set()):
+    """
+    Generates a specified number of x86 single-byte (pseudo-)NOPs
+
+    :param count: The number of bytes to generate
+    :param side_effect_free: Determines whether NOPs with side-effects (to registers or the stack) are allowed
+    :param char_filter: A set of bytes which are forbidden to generate
+    :return: Random x86 NOP bytestring
+    """
+    result = b''
+    nops = x86_nops
+    if not side_effect_free:
+        nops |= x86_pseudo_nops
+
+    if not isinstance(char_filter, set):
+        char_filter = set(char_filter)
+    nops = list(nops-char_filter)
+
+    for i in range(0, count):
+        result += nops[randint(0, len(nops) - 1)]
+    return result
+
+
+def get_rnd_bytes(count=1, ignore=None):
+    """
+    Generates a specified number of random bytes while excluding unwanted bytes
+
+    :param count: Number of wanted bytes
+    :param ignore: The bytes, which should be ignored, as an array
+    :return: Random bytestring
+    """
+    if ignore is None:
+        ignore = []
+    result = b''
+    for i in range(0, count):
+        char = urandom(1)
+        while char in ignore:
+            char = urandom(1)
+        result += char
+    return result
+
+
+def get_bytes_from_file(filepath):
+    """
+    Converts the content of a file into its byte representation
+    The content of the file can either be a string or hexadecimal numbers/bytes (e.g. shellcode)
+    The file must have the keyword "str" or "hex" in its first line to specify the rest of the content
+    If the content is hex, whitespaces, backslashes, "x", quotation marks and "+" are removed
+    Example for a hexadecimal input file:
+
+        hex
+        "abcd ef \xff10\ff 'xaa' x \ ab"
+
+    Output: b'\xab\xcd\xef\xff\x10\xff\xaa\xab'
+
+    :param filepath: The path of the file from which to get the bytes
+    :return: The bytes of the file (either a byte representation of a string or the bytes contained in the file)
+    """
+    try:
+        file = open(filepath)
+        result_bytes = b''
+        header = file.readline().strip()
+        content = file.read()
+
+        if header == "hex":
+            content = content.replace(" ", "").replace("\n", "").replace("\\", "").replace("x", "").replace("\"", "")\
+                .replace("'", "").replace("+", "").replace("\r", "")
+            try:
+                result_bytes = bytes.fromhex(content)
+            except ValueError:
+                print("\nERROR: Content of file is not all hexadecimal.")
+                exit(1)
+        elif header == "str":
+            result_bytes = content.encode()
+        else:
+            print("\nERROR: Invalid header found: " + header + ". Try 'hex' or 'str' followed by endline instead.")
+            exit(1)
+
+        for forbidden_char in forbidden_chars:
+            if forbidden_char in result_bytes:
+                print("\nERROR: Forbidden character found in payload: ", forbidden_char)
+                exit(1)
+
+        file.close()
+        return result_bytes
+
+    except FileNotFoundError:
+        print("\nERROR: File not found: ", filepath)
+        exit(1)

+ 0 - 0
code/ID2TLib/OtherGroupLib/__init__.py


+ 53 - 0
code/ID2TLib/Statistics.py

@@ -618,6 +618,52 @@ class Statistics:
 
         return out_degree
 
+    def get_avg_delay_local_ext(self):
+        """
+        Calculates the average delay of a packet for external and local communication, based on the tcp handshakes
+        :return: tuple consisting of avg delay for local and external communication, (local, external)
+        """
+
+        conv_delays = self.stats_db._process_user_defined_query("SELECT ipAddressA, ipAddressB, avgDelay FROM conv_statistics")
+        if(conv_delays):
+            external_conv = []
+            local_conv = []
+
+            for conv in conv_delays:
+                IPA = IPAddress.parse(conv[0])
+                IPB = IPAddress.parse(conv[1])
+
+                #split into local and external conversations
+                if(not IPA.is_private() or not IPB.is_private()):
+                    external_conv.append(conv)
+                else:
+                    local_conv.append(conv)
+   
+            # calculate avg local and external delay by summing up the respective delays and dividing them by the number of conversations
+            avg_delay_external = 0.0
+            avg_delay_local = 0.0
+
+            if(local_conv):
+                for conv in local_conv:
+                    avg_delay_local += conv[2]
+                avg_delay_local = (avg_delay_local/len(local_conv)) * 0.001 #ms
+            else:
+                # no local conversations in statistics found
+                avg_delay_local = 0.06
+
+            if(external_conv):
+                for conv in external_conv:
+                    avg_delay_external += conv[2]
+                avg_delay_external = (avg_delay_external/len(external_conv)) * 0.001 #ms
+            else:
+                # no external conversations in statistics found
+                avg_delay_external = 0.15
+        else:
+            #if no statistics were found, use these numbers
+            avg_delay_external = 0.15
+            avg_delay_local = 0.06
+        return avg_delay_local, avg_delay_external
+
     def filter_multiples(self, entries):
         """
         helper function, for get_out_degree and get_in_degree
@@ -1029,6 +1075,13 @@ class Statistics:
                 return out
 
         def plot_packets_per_connection(file_ending: str):
+            """
+            Plots the exchanged packets per connection as horizontal bar plot. 
+            Included are 'half-open' connections, where only one packet is exchanged.
+
+            :param file_ending: The file extension for the output file containing the plot
+            :return: A filepath to the file containing the created plot
+            """
             plt.gcf().clear()
             result = self.stats_db._process_user_defined_query(
                 "SELECT ipAddressA, portA, ipAddressB, portB, pktsCount FROM conv_statistics_stateless")