Browse Source

Revert "pasted other groups files into ours"

This reverts commit e4df2813a052a4cfc71ef7f46880cef39aecdb71.
Denis Waßmann 6 years ago
parent
commit
51d40e2988

+ 18 - 10
code/Attack/AttackParameters.py

@@ -10,8 +10,6 @@ class Parameter(Enum):
     IP_SOURCE = 'ip.src'  # source IP address
     IP_DESTINATION = 'ip.dst'  # destination IP address
     IP_DNS = 'ip.dns'  # IP address of DNS server
-    HOSTING_IP = 'hosting.ip'
-    IP_DESTINATION_END = 'ip.dst.end'
     # recommended type: MAC address ------------------------------
     MAC_SOURCE = 'mac.src'  # MAC address of source
     MAC_DESTINATION = 'mac.dst'  # MAC address of destination
@@ -25,6 +23,7 @@ class Parameter(Enum):
     ATTACK_DURATION = 'attack.duration' # in seconds
     VICTIM_BUFFER = 'victim.buffer' # in packets
     TARGET_URI = 'target.uri'
+    NUMBER_INITIATOR_BOTS = 'bots.count'
     # recommended type: domain -----------------------------------
     TARGET_HOST = 'target.host'
 
@@ -38,13 +37,19 @@ class Parameter(Enum):
     PORT_DEST_ORDER_DESC = 'port.dst.order-desc'  # uses a descending port order instead of a ascending order
     IP_SOURCE_RANDOMIZE = 'ip.src.shuffle'  # randomizes the sources IP address if a list of IP addresses is given
     PORT_SOURCE_RANDOMIZE = 'port.src.shuffle'  # randomizes the source port if a list of sources ports is given
-
-    PROTOCOL_VERSION = 'protocol.version'
-    HOSTING_VERSION = 'hosting.version'
-    SOURCE_PLATFORM = 'src.platform'
-    CUSTOM_PAYLOAD = 'custom.payload'  # custom payload for ftp exploits
-    CUSTOM_PAYLOAD_FILE = 'custom.payload.file'  # file that contains custom payload for ftp exploits
-
+    NAT_PRESENT = 'nat.present'  # if NAT is active, external computers cannot initiate a communication in MembersMgmtCommAttack
+    TTL_FROM_CAIDA = 'ttl.from.caida'  # if True, TTLs are assigned based on the TTL distributions from the CAIDA dataset
+    # recommended type: Filepath ------------------------------------
+    FILE_CSV = 'file.csv'  # filepath to CSV containing a communication pattern
+    FILE_XML = 'file.xml'  # filepath to XML containing a communication pattern
+    # recommended type: CommType ------------------------------------
+    COMM_TYPE = "comm.type"  # the locality of bots in botnet communication (e.g. local, external, mixed)
+    # recommended type: Percentage (0.0-1.0) ------------------------------------
+    IP_REUSE_TOTAL = 'ip.reuse.total'  # percentage of IPs in original PCAP to be reused
+    IP_REUSE_LOCAL = 'ip.reuse.local'  # percentage of private IPs in original PCAP to be reused
+    IP_REUSE_EXTERNAL = 'ip.reuse.external'  # percentage of public IPs in original PCAP to be reused
+    # recommended type: Positive Integer between 0 and 100 ------------------------------------
+    PACKET_PADDING = 'packet.padding'
 
 class ParameterTypes(Enum):
     """
@@ -60,4 +65,7 @@ class ParameterTypes(Enum):
     TYPE_FLOAT = 6
     TYPE_PACKET_POSITION = 7  # used to derive timestamp from parameter INJECT_AFTER_PACKET
     TYPE_DOMAIN = 8
-    TYPE_STRING = 9
+    TYPE_FILEPATH = 9
+    TYPE_COMM_TYPE = 10
+    TYPE_PERCENTAGE = 11
+    TYPE_PADDING = 12

+ 92 - 9
code/Attack/BaseAttack.py

@@ -5,17 +5,16 @@ import os
 import random
 import re
 import tempfile
-import numpy as np
-
 from abc import abstractmethod, ABCMeta
 from scapy.layers.inet import Ether
+import numpy as np, math
+
+import ID2TLib.libpcapreader as pr
 from scapy.utils import PcapWriter
 
 from Attack import AttackParameters
 from Attack.AttackParameters import Parameter
 from Attack.AttackParameters import ParameterTypes
-import ID2TLib.libpcapreader as pr
-
 
 class BaseAttack(metaclass=ABCMeta):
     """
@@ -241,6 +240,50 @@ class BaseAttack(metaclass=ABCMeta):
         domain = re.match('^(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+$', val)
         return (domain is not None)
 
+    @staticmethod
+    def _is_filepath(val: str):
+        """
+        Verifies that the given string points to an existing file
+
+        :param filepath: The filepath as string
+        :return: True if the file at the given location exists, otherwise False
+        """
+        return os.path.isfile(val)
+
+    @staticmethod
+    def _is_comm_type(val: str):
+        """
+        Verifies that the given string is a valid communications type
+
+        :param comm_type: the type of communication as a string
+        :return: True if the given type is a valid communications type, otherwise False
+        """
+        comm_types = {"local", "external", "mixed"}
+        return val in comm_types
+
+    @staticmethod
+    def _is_percentage(val: float):
+        """
+        Verifies that the given float value is a valid percentage, i.e. between 0 and 1
+
+        :param percentage: the float to test for validity
+        :return: True if the given type is a valid percentage, otherwise False
+        """
+        if val >= 0 and val <= 1:
+            return True
+        return False
+
+    @staticmethod
+    def _is_padding(val: int):
+        """
+        Verifies that the given int is a valid padding size, i.e. between 0 and 100
+
+        :param padding: the padding to test for its size
+        :return: True if the given type is valid padding, False otherwise
+        """
+        if val >= 0 and val <= 100:
+            return True
+        return False
 
     #########################################
     # HELPER METHODS
@@ -301,9 +344,6 @@ class BaseAttack(metaclass=ABCMeta):
             elif isinstance(value, str) and value.isdigit() and int(value) >= 0:
                 is_valid = True
                 value = int(value)
-        elif param_type == ParameterTypes.TYPE_STRING:
-            if isinstance(value, str):
-                is_valid = True
         elif param_type == ParameterTypes.TYPE_FLOAT:
             is_valid, value = self._is_float(value)
             # this is required to avoid that the timestamp's microseconds of the first attack packet is '000000'
@@ -312,6 +352,15 @@ class BaseAttack(metaclass=ABCMeta):
             # e.g. inject.at-timestamp=123456 -> is changed to: 123456.[random digits]
             if param_name == Parameter.INJECT_AT_TIMESTAMP and is_valid and ((value - int(value)) == 0):
                 value = value + random.uniform(0, 0.999999)
+            # first packet of a pcap displays a timestamp of zero, but internally (usually) has a much larger one
+            # inject.at-timestamp has to be shifted by the value of the first packet of the input pcap
+            # otherwise new packets are always injected at the beginning and there is a large distance
+            # to the packets of the input pcap
+            if param_name == Parameter.INJECT_AT_TIMESTAMP and is_valid:
+                ts_first_pkt = pr.pcap_processor(self.statistics.pcap_filepath, "False").get_timestamp_mu_sec(1)
+                if ts_first_pkt >= 0:
+                    is_valid = True
+                    value = value + (ts_first_pkt / 1000000)  # convert microseconds from getTimestampMuSec into seconds
         elif param_type == ParameterTypes.TYPE_TIMESTAMP:
             is_valid = self._is_timestamp(value)
         elif param_type == ParameterTypes.TYPE_BOOLEAN:
@@ -324,6 +373,25 @@ class BaseAttack(metaclass=ABCMeta):
                 value = (ts / 1000000)  # convert microseconds from getTimestampMuSec into seconds
         elif param_type == ParameterTypes.TYPE_DOMAIN:
             is_valid = self._is_domain(value)
+        elif param_type == ParameterTypes.TYPE_FILEPATH:
+            is_valid = self._is_filepath(value)
+        elif param_type == ParameterTypes.TYPE_COMM_TYPE:
+            is_valid = self._is_comm_type(value)
+        elif param_type == ParameterTypes.TYPE_PERCENTAGE:
+            is_valid, value = self._is_float(value)
+            if is_valid and (param_name in {Parameter.IP_REUSE_TOTAL, Parameter.IP_REUSE_LOCAL, Parameter.IP_REUSE_EXTERNAL}):
+                is_valid = self._is_percentage(value)
+            else: 
+                is_valid = False
+        elif param_type == ParameterTypes.TYPE_PADDING:
+            if isinstance(value, int):
+                is_valid = True
+            elif isinstance(value, str) and value.isdigit():
+                is_valid = True
+                value = int(value)
+                
+            if is_valid:
+                is_valid = self._is_padding(value) 
 
         # add value iff validation was successful
         if is_valid:
@@ -384,10 +452,17 @@ class BaseAttack(metaclass=ABCMeta):
 
         return destination
 
-    def get_reply_delay(self, ip_dst):
+    def post_pcap_written(self, final_filename):
+        """
+        :param final_filename: The filename of the final pcap created
+        """
+        pass
+
+    def get_reply_delay(self, ip_dst, default = 2000):
         """
            Gets the minimum and the maximum reply delay for all the connections of a specific IP.
            :param ip_dst: The IP to reterive its reply delay.
+           :param default: The default value to return if no delay could be fount. If < 0 raise an exception instead
            :return minDelay: minimum delay
            :return maxDelay: maximum delay
 
@@ -402,6 +477,14 @@ class BaseAttack(metaclass=ABCMeta):
             minDelay = np.median(allMinDelays)
             allMaxDelays = self.statistics.process_db_query("SELECT maxDelay FROM conv_statistics LIMIT 500;")
             maxDelay = np.median(allMaxDelays)
+
+            if math.isnan(minDelay): # maxDelay is nan too then
+                if default < 0:
+                    raise ValueError("Could not calculate min/maxDelay")
+
+                minDelay = default
+                maxDelay = default
+
         minDelay = int(minDelay) * 10 ** -6  # convert from micro to seconds
         maxDelay = int(maxDelay) * 10 ** -6
         return minDelay, maxDelay
@@ -490,7 +573,7 @@ class BaseAttack(metaclass=ABCMeta):
         inter_arrival_times = []
         prvsPktTime = 0
         for index, pkt in enumerate(packets):
-            timestamp = pkt[2][0] + pkt[2][1]/10**6
+            timestamp = pkt[1][0] + pkt[1][1]/10**6
 
             if index == 0:
                 prvsPktTime = timestamp

+ 54 - 6
code/Attack/DDoSAttack.py

@@ -1,18 +1,17 @@
 import logging
+from random import randint, uniform, choice
 
-from random import randint, choice
 from lea import Lea
-from collections import deque
 from scipy.stats import gamma
-from scapy.layers.inet import IP, Ether, TCP, RandShort
 
 from Attack import BaseAttack
 from Attack.AttackParameters import Parameter as Param
 from Attack.AttackParameters import ParameterTypes
-from ID2TLib.Utility import update_timestamp, get_interval_pps, get_nth_random_element, index_increment
 
 logging.getLogger("scapy.runtime").setLevel(logging.ERROR)
 # noinspection PyPep8
+from scapy.layers.inet import IP, Ether, TCP, RandShort
+from collections import deque
 
 
 class DDoSAttack(BaseAttack.BaseAttack):
@@ -72,7 +71,55 @@ class DDoSAttack(BaseAttack.BaseAttack):
         self.add_param_value(Param.MAC_DESTINATION, destination_mac)
         self.add_param_value(Param.VICTIM_BUFFER, randint(1000,10000))
 
-    def generate_attack_pcap(self):
+    def generate_attack_pcap(self, context):
+        def update_timestamp(timestamp, pps, delay=0):
+            """
+            Calculates the next timestamp to be used based on the packet per second rate (pps) and the maximum delay.
+
+            :return: Timestamp to be used for the next packet.
+            """
+            if delay == 0:
+                # Calculate the request timestamp
+                # A distribution to imitate the bursty behavior of traffic
+                randomdelay = Lea.fromValFreqsDict({1 / pps: 70, 2 / pps: 20, 5 / pps: 7, 10 / pps: 3})
+                return timestamp + uniform(1 / pps, randomdelay.random())
+            else:
+                # Calculate the reply timestamp
+                randomdelay = Lea.fromValFreqsDict({2 * delay: 70, 3 * delay: 20, 5 * delay: 7, 10 * delay: 3})
+                return timestamp + uniform(1 / pps + delay, 1 / pps + randomdelay.random())
+
+        def get_nth_random_element(*element_list):
+            """
+            Returns the n-th element of every list from an arbitrary number of given lists.
+            For example, list1 contains IP addresses, list 2 contains MAC addresses. Use of this function ensures that
+            the n-th IP address uses always the n-th MAC address.
+            :param element_list: An arbitrary number of lists.
+            :return: A tuple of the n-th element of every list.
+            """
+            range_max = min([len(x) for x in element_list])
+            if range_max > 0: range_max -= 1
+            n = randint(0, range_max)
+            return tuple(x[n] for x in element_list)
+
+        def index_increment(number: int, max: int):
+            if number + 1 < max:
+                return number + 1
+            else:
+                return 0
+
+        def getIntervalPPS(complement_interval_pps, timestamp):
+            """
+            Gets the packet rate (pps) for a specific time interval.
+            :param complement_interval_pps: an array of tuples (the last timestamp in the interval, the packet rate in the crresponding interval).
+            :param timestamp: the timestamp at which the packet rate is required.
+            :return: the corresponding packet rate (pps) .
+            """
+            for row in complement_interval_pps:
+                if timestamp <= row[0]:
+                    return row[1]
+            # In case the timestamp > capture max timestamp
+            return complement_interval_pps[-1][1]
+
         def get_attacker_config(ipAddress: str):
             """
             Returns the attacker configuration depending on the IP address, this includes the port for the next
@@ -104,6 +151,7 @@ class DDoSAttack(BaseAttack.BaseAttack):
                 attacker_ttl_mapping[ipAddress] = ttl
             # return port and TTL
             return next_port, ttl
+
         BUFFER_SIZE = 1000
 
         # Determine source IP and MAC address
@@ -232,7 +280,7 @@ class DDoSAttack(BaseAttack.BaseAttack):
                     replies_count+=1
                     total_pkt_num += 1
 
-                attacker_pps = max(get_interval_pps(complement_interval_attacker_pps, timestamp_next_pkt), (pps / num_attackers) / 2)
+                attacker_pps = max(getIntervalPPS(complement_interval_attacker_pps, timestamp_next_pkt), (pps/num_attackers)/2)
                 timestamp_next_pkt = update_timestamp(timestamp_next_pkt, attacker_pps)
 
                 # Store timestamp of first packet (for attack label)

+ 35 - 16
code/Attack/EternalBlueExploit.py

@@ -1,23 +1,22 @@
 import logging
-
 from random import randint, uniform
+
 from lea import Lea
-from scapy.utils import RawPcapReader
-from scapy.layers.inet import Ether
 
 from Attack import BaseAttack
 from Attack.AttackParameters import Parameter as Param
 from Attack.AttackParameters import ParameterTypes
-from ID2TLib.Utility import update_timestamp, get_interval_pps
-from ID2TLib.SMBLib import smb_port
 
 logging.getLogger("scapy.runtime").setLevel(logging.ERROR)
 # noinspection PyPep8
-
+from scapy.utils import RawPcapReader
+from scapy.layers.inet import IP, Ether, TCP, RandShort
 
 class EternalBlueExploit(BaseAttack.BaseAttack):
     template_scan_pcap_path = "resources/Win7_eternalblue_scan.pcap"
     template_attack_pcap_path = "resources/Win7_eternalblue_exploit.pcap"
+    # SMB port
+    smb_port = 445
     # Empirical values from Metasploit experiments
     minDefaultPort = 30000
     maxDefaultPort = 50000
@@ -70,7 +69,7 @@ class EternalBlueExploit(BaseAttack.BaseAttack):
         if isinstance(destination_mac, list) and len(destination_mac) == 0:
             destination_mac = self.generate_random_mac_address()
         self.add_param_value(Param.MAC_DESTINATION, destination_mac)
-        self.add_param_value(Param.PORT_DESTINATION, smb_port)
+        self.add_param_value(Param.PORT_DESTINATION, self.smb_port)
 
         # Attack configuration
         self.add_param_value(Param.PACKETS_PER_SECOND,
@@ -78,8 +77,28 @@ class EternalBlueExploit(BaseAttack.BaseAttack):
                               self.statistics.get_pps_received(most_used_ip_address)) / 2)
         self.add_param_value(Param.INJECT_AFTER_PACKET, randint(0, self.statistics.get_packet_count()))
 
-    def generate_attack_pcap(self):
-
+    def generate_attack_pcap(self, context):
+        def update_timestamp(timestamp, pps):
+            """
+            Calculates the next timestamp to be used based on the packet per second rate (pps) and the maximum delay.
+
+            :return: Timestamp to be used for the next packet.
+            """
+            # Calculate the request timestamp
+            # A distribution to imitate the bursty behavior of traffic
+            randomdelay = Lea.fromValFreqsDict({1 / pps: 70, 2 / pps: 20, 5 / pps: 7, 10 / pps: 3})
+            return timestamp + uniform(1 / pps, randomdelay.random())
+
+        def getIntervalPPS(complement_interval_pps, timestamp):
+            """
+            Gets the packet rate (pps) in specific time interval.
+
+            :return: the corresponding packet rate for packet rate (pps) .
+            """
+            for row in complement_interval_pps:
+                if timestamp<=row[0]:
+                    return row[1]
+            return complement_interval_pps[-1][1] # in case the timstamp > capture max timestamp
 
         # Timestamp
         timestamp_next_pkt = self.get_param_value(Param.INJECT_AT_TIMESTAMP)
@@ -152,7 +171,7 @@ class EternalBlueExploit(BaseAttack.BaseAttack):
             tcp_pkt = ip_pkt.payload
 
             if pkt_num == 0:
-                if tcp_pkt.getfieldval("dport") == smb_port:
+                if tcp_pkt.getfieldval("dport") == self.smb_port:
                     orig_ip_dst = ip_pkt.getfieldval("dst") # victim IP
 
             # Request
@@ -183,7 +202,7 @@ class EternalBlueExploit(BaseAttack.BaseAttack):
                 new_pkt = (eth_frame / ip_pkt / tcp_pkt)
                 new_pkt.time = timestamp_next_pkt
 
-                pps = max(get_interval_pps(complement_interval_pps, timestamp_next_pkt), 10)
+                pps = max(getIntervalPPS(complement_interval_pps, timestamp_next_pkt), 10)
                 timestamp_next_pkt = update_timestamp(timestamp_next_pkt, pps) + inter_arrival_times[pkt_num]#float(timeSteps.random())
             # Reply
             else:
@@ -244,7 +263,7 @@ class EternalBlueExploit(BaseAttack.BaseAttack):
                     tcp_pkt = ip_pkt.payload
 
                     if pkt_num == 0:
-                        if tcp_pkt.getfieldval("dport") == smb_port:
+                        if tcp_pkt.getfieldval("dport") == self.smb_port:
                             orig_ip_dst = ip_pkt.getfieldval("dst")
 
                     # Request
@@ -275,7 +294,7 @@ class EternalBlueExploit(BaseAttack.BaseAttack):
                         new_pkt = (eth_frame / ip_pkt / tcp_pkt)
                         new_pkt.time = timestamp_next_pkt
 
-                        pps = max(get_interval_pps(complement_interval_pps, timestamp_next_pkt), 10)
+                        pps = max(getIntervalPPS(complement_interval_pps, timestamp_next_pkt), 10)
                         timestamp_next_pkt = update_timestamp(timestamp_next_pkt, pps) + inter_arrival_times[pkt_num] #float(timeSteps.random())
 
                     # Reply
@@ -305,7 +324,7 @@ class EternalBlueExploit(BaseAttack.BaseAttack):
 
                         new_pkt = (eth_frame / ip_pkt / tcp_pkt)
 
-                        pps = max(get_interval_pps(complement_interval_pps, timestamp_next_pkt), 10)
+                        pps = max(getIntervalPPS(complement_interval_pps, timestamp_next_pkt), 10)
                         timestamp_next_pkt = update_timestamp(timestamp_next_pkt, pps) + inter_arrival_times[pkt_num]#float(timeSteps.random())
 
                         new_pkt.time = timestamp_next_pkt
@@ -348,7 +367,7 @@ class EternalBlueExploit(BaseAttack.BaseAttack):
                         new_pkt = (eth_frame / ip_pkt / tcp_pkt)
                         new_pkt.time = timestamp_next_pkt
 
-                        pps = max(get_interval_pps(complement_interval_pps, timestamp_next_pkt), 10)
+                        pps = max(getIntervalPPS(complement_interval_pps, timestamp_next_pkt), 10)
                         timestamp_next_pkt = update_timestamp(timestamp_next_pkt, pps) + inter_arrival_times[pkt_num]# float(timeSteps.random())
 
                     # Reply
@@ -378,7 +397,7 @@ class EternalBlueExploit(BaseAttack.BaseAttack):
 
                         new_pkt = (eth_frame / ip_pkt / tcp_pkt)
 
-                        pps = max(get_interval_pps(complement_interval_pps, timestamp_next_pkt), 10)
+                        pps = max(getIntervalPPS(complement_interval_pps, timestamp_next_pkt), 10)
                         timestamp_next_pkt = update_timestamp(timestamp_next_pkt, pps) + inter_arrival_times[pkt_num]# float(timeSteps.random())
 
                         new_pkt.time = timestamp_next_pkt

+ 27 - 7
code/Attack/JoomlaRegPrivExploit.py

@@ -1,17 +1,16 @@
 import logging
+from random import randint, uniform
 
-from random import randint
 from lea import Lea
-from scapy.utils import RawPcapReader
-from scapy.layers.inet import Ether
 
 from Attack import BaseAttack
 from Attack.AttackParameters import Parameter as Param
 from Attack.AttackParameters import ParameterTypes
-from ID2TLib.Utility import update_timestamp, get_interval_pps
 
 logging.getLogger("scapy.runtime").setLevel(logging.ERROR)
 # noinspection PyPep8
+from scapy.utils import RawPcapReader
+from scapy.layers.inet import IP, Ether, TCP, RandShort
 
 
 class JoomlaRegPrivExploit(BaseAttack.BaseAttack):
@@ -79,7 +78,28 @@ class JoomlaRegPrivExploit(BaseAttack.BaseAttack):
                              (self.statistics.get_pps_sent(most_used_ip_address) +
                               self.statistics.get_pps_received(most_used_ip_address)) / 2)
 
-    def generate_attack_pcap(self):
+    def generate_attack_pcap(self, context):
+        def update_timestamp(timestamp, pps):
+            """
+            Calculates the next timestamp to be used based on the packet per second rate (pps) and the maximum delay.
+
+            :return: Timestamp to be used for the next packet.
+            """
+            # Calculate the request timestamp
+            # A distribution to imitate the bursty behavior of traffic
+            randomdelay = Lea.fromValFreqsDict({1 / pps: 70, 2 / pps: 20, 5 / pps: 7, 10 / pps: 3})
+            return timestamp + uniform(1 / pps, randomdelay.random())
+
+        def getIntervalPPS(complement_interval_pps, timestamp):
+            """
+            Gets the packet rate (pps) in specific time interval.
+
+            :return: the corresponding packet rate for packet rate (pps) .
+            """
+            for row in complement_interval_pps:
+                if timestamp <= row[0]:
+                    return row[1]
+            return complement_interval_pps[-1][1]  # in case the timstamp > capture max timestamp
 
         # Timestamp
         timestamp_next_pkt = self.get_param_value(Param.INJECT_AT_TIMESTAMP)
@@ -186,7 +206,7 @@ class JoomlaRegPrivExploit(BaseAttack.BaseAttack):
                 new_pkt = (eth_frame / ip_pkt/ tcp_pkt / str_tcp_seg)
                 new_pkt.time = timestamp_next_pkt
 
-                pps = max(get_interval_pps(complement_interval_pps, timestamp_next_pkt), 10)
+                pps = max(getIntervalPPS(complement_interval_pps, timestamp_next_pkt), 10)
                 timestamp_next_pkt = update_timestamp(timestamp_next_pkt, pps) + float(timeSteps.random())
 
             # Reply: Victim --> attacker
@@ -212,7 +232,7 @@ class JoomlaRegPrivExploit(BaseAttack.BaseAttack):
                     victim_seq += max(strLen, 1)
 
                 new_pkt = (eth_frame / ip_pkt / tcp_pkt / str_tcp_seg)
-                pps = max(get_interval_pps(complement_interval_pps, timestamp_next_pkt), 10)
+                pps = max(getIntervalPPS(complement_interval_pps, timestamp_next_pkt), 10)
                 timestamp_next_pkt = update_timestamp(timestamp_next_pkt, pps) + float(timeSteps.random())
                 new_pkt.time = timestamp_next_pkt
 

+ 35 - 11
code/Attack/PortscanAttack.py

@@ -1,18 +1,17 @@
 import logging
 import csv
 
-from random import shuffle, randint, choice
+from random import shuffle, randint, choice, uniform
+
 from lea import Lea
-from scapy.layers.inet import IP, Ether, TCP
 
 from Attack import BaseAttack
 from Attack.AttackParameters import Parameter as Param
 from Attack.AttackParameters import ParameterTypes
-from ID2TLib.Utility import update_timestamp, get_interval_pps
 
 logging.getLogger("scapy.runtime").setLevel(logging.ERROR)
 # noinspection PyPep8
-
+from scapy.layers.inet import IP, Ether, TCP
 
 class PortscanAttack(BaseAttack.BaseAttack):
 
@@ -99,8 +98,8 @@ class PortscanAttack(BaseAttack.BaseAttack):
         if (ports_num == 1000):  # used for port.dst
             temp_array = [[0 for i in range(10)] for i in range(100)]
             port_dst_shuffled = []
-            for count in range(0, 10):
-                temp_array[count] = ports_dst[count * 100:(count + 1) * 100]
+            for count in range(0, 9):
+                temp_array[count] = ports_dst[count * 100:count * 100 + 99]
                 shuffle(temp_array[count])
                 port_dst_shuffled += temp_array[count]
         else:  # used for port.open
@@ -108,10 +107,35 @@ class PortscanAttack(BaseAttack.BaseAttack):
             port_dst_shuffled = ports_dst
         return port_dst_shuffled
 
-    def generate_attack_pcap(self):
-
-
-
+    def generate_attack_pcap(self, context):
+        def update_timestamp(timestamp, pps, delay=0):
+            """
+            Calculates the next timestamp to be used based on the packet per second rate (pps) and the maximum delay.
+
+            :return: Timestamp to be used for the next packet.
+            """
+            if delay == 0:
+                # Calculate request timestamp
+                # To imitate the bursty behavior of traffic
+                randomdelay = Lea.fromValFreqsDict({1 / pps: 70, 2 / pps: 20, 5 / pps: 7, 10 / pps: 3})
+                return timestamp + uniform(1/pps ,  randomdelay.random())
+            else:
+                # Calculate reply timestamp
+                randomdelay = Lea.fromValFreqsDict({2*delay: 70, 3*delay: 20, 5*delay: 7, 10*delay: 3})
+                return timestamp + uniform(1 / pps + delay,  1 / pps + randomdelay.random())
+
+        def getIntervalPPS(complement_interval_pps, timestamp):
+            """
+            Gets the packet rate (pps) for a specific time interval.
+
+            :param complement_interval_pps: an array of tuples (the last timestamp in the interval, the packet rate in the crresponding interval).
+            :param timestamp: the timestamp at which the packet rate is required.
+            :return: the corresponding packet rate (pps) .
+            """
+            for row in complement_interval_pps:
+                if timestamp<=row[0]:
+                    return row[1]
+            return complement_interval_pps[-1][1] # in case the timstamp > capture max timestamp
 
         mac_source = self.get_param_value(Param.MAC_SOURCE)
         mac_destination = self.get_param_value(Param.MAC_DESTINATION)
@@ -255,7 +279,7 @@ class PortscanAttack(BaseAttack.BaseAttack):
 
                 # else: destination port is NOT OPEN -> no reply is sent by target
 
-            pps = max(get_interval_pps(complement_interval_pps, timestamp_next_pkt), 10)
+            pps = max(getIntervalPPS(complement_interval_pps, timestamp_next_pkt),10)
             timestamp_next_pkt = update_timestamp(timestamp_next_pkt, pps)
 
         # store end time of attack

+ 28 - 7
code/Attack/SQLiAttack.py

@@ -1,10 +1,7 @@
 import logging
+from random import randint, uniform
 
-from random import randint
 from lea import Lea
-from scapy.utils import RawPcapReader
-from scapy.layers.inet import Ether
-from ID2TLib.Utility import update_timestamp, get_interval_pps
 
 from Attack import BaseAttack
 from Attack.AttackParameters import Parameter as Param
@@ -12,6 +9,8 @@ from Attack.AttackParameters import ParameterTypes
 
 logging.getLogger("scapy.runtime").setLevel(logging.ERROR)
 # noinspection PyPep8
+from scapy.utils import RawPcapReader
+from scapy.layers.inet import IP, Ether, TCP, RandShort
 
 
 class SQLiAttack(BaseAttack.BaseAttack):
@@ -79,7 +78,29 @@ class SQLiAttack(BaseAttack.BaseAttack):
                              (self.statistics.get_pps_sent(most_used_ip_address) +
                               self.statistics.get_pps_received(most_used_ip_address)) / 2)
 
-    def generate_attack_pcap(self):
+    def generate_attack_pcap(self, context):
+        def update_timestamp(timestamp, pps):
+            """
+            Calculates the next timestamp to be used based on the packet per second rate (pps) and the maximum delay.
+
+            :return: Timestamp to be used for the next packet.
+            """
+            # Calculate the request timestamp
+            # A distribution to imitate the bursty behavior of traffic
+            randomdelay = Lea.fromValFreqsDict({1 / pps: 70, 2 / pps: 20, 5 / pps: 7, 10 / pps: 3})
+            return timestamp + uniform(1 / pps, randomdelay.random())
+
+        def getIntervalPPS(complement_interval_pps, timestamp):
+            """
+            Gets the packet rate (pps) in specific time interval.
+
+            :return: the corresponding packet rate for packet rate (pps) .
+            """
+            for row in complement_interval_pps:
+                if timestamp <= row[0]:
+                    return row[1]
+            return complement_interval_pps[-1][1]  # in case the timstamp > capture max timestamp
+
         # Timestamp
         timestamp_next_pkt = self.get_param_value(Param.INJECT_AT_TIMESTAMP)
         pps = self.get_param_value(Param.PACKETS_PER_SECOND)
@@ -187,7 +208,7 @@ class SQLiAttack(BaseAttack.BaseAttack):
                     new_pkt = (eth_frame / ip_pkt/ tcp_pkt / str_tcp_seg)
                     new_pkt.time = timestamp_next_pkt
 
-                    pps = max(get_interval_pps(complement_interval_pps, timestamp_next_pkt), 10)
+                    pps = max(getIntervalPPS(complement_interval_pps, timestamp_next_pkt), 10)
                     timestamp_next_pkt = update_timestamp(timestamp_next_pkt, pps) + float(timeSteps.random())
 
                 # Victim --> attacker
@@ -249,7 +270,7 @@ class SQLiAttack(BaseAttack.BaseAttack):
                     new_pkt = (eth_frame / ip_pkt / tcp_pkt / str_tcp_seg)
                     new_pkt.time = timestamp_next_pkt
 
-                    pps = max(get_interval_pps(complement_interval_pps, timestamp_next_pkt), 10)
+                    pps = max(getIntervalPPS(complement_interval_pps, timestamp_next_pkt), 10)
                     timestamp_next_pkt = update_timestamp(timestamp_next_pkt, pps) + float(timeSteps.random())
 
                 # Victim --> attacker

+ 27 - 6
code/Attack/SalityBotnet.py

@@ -1,16 +1,16 @@
 import logging
+from random import randint, uniform
 
-from random import randint
-from scapy.utils import RawPcapReader
-from scapy.layers.inet import Ether
+from lea import Lea
 
 from Attack import BaseAttack
 from Attack.AttackParameters import Parameter as Param
 from Attack.AttackParameters import ParameterTypes
-from ID2TLib.Utility import update_timestamp, get_interval_pps
 
 logging.getLogger("scapy.runtime").setLevel(logging.ERROR)
 # noinspection PyPep8
+from scapy.utils import RawPcapReader
+from scapy.layers.inet import IP, Ether, TCP, RandShort
 
 
 class SalityBotnet(BaseAttack.BaseAttack):
@@ -56,7 +56,28 @@ class SalityBotnet(BaseAttack.BaseAttack):
                              (self.statistics.get_pps_sent(most_used_ip_address) +
                               self.statistics.get_pps_received(most_used_ip_address)) / 2)
 
-    def generate_attack_pcap(self):
+    def generate_attack_pcap(self, context):
+        def update_timestamp(timestamp, pps):
+            """
+            Calculates the next timestamp to be used based on the packet per second rate (pps) and the maximum delay.
+
+            :return: Timestamp to be used for the next packet.
+            """
+            # Calculate the request timestamp
+            # A distribution to imitate the bursty behavior of traffic
+            randomdelay = Lea.fromValFreqsDict({1 / pps: 70, 2 / pps: 20, 5 / pps: 7, 10 / pps: 3})
+            return timestamp + uniform(1 / pps, randomdelay.random())
+
+        def getIntervalPPS(complement_interval_pps, timestamp):
+            """
+            Gets the packet rate (pps) in specific time interval.
+
+            :return: the corresponding packet rate for packet rate (pps) .
+            """
+            for row in complement_interval_pps:
+                if timestamp <= row[0]:
+                    return row[1]
+            return complement_interval_pps[-1][1]  # in case the timstamp > capture max timestamp
 
         # Timestamp
         timestamp_next_pkt = self.get_param_value(Param.INJECT_AT_TIMESTAMP)
@@ -122,7 +143,7 @@ class SalityBotnet(BaseAttack.BaseAttack):
             new_pkt = (eth_frame / ip_pkt)
             new_pkt.time = timestamp_next_pkt
 
-            pps = max(get_interval_pps(complement_interval_pps, timestamp_next_pkt), 10)
+            pps = max(getIntervalPPS(complement_interval_pps, timestamp_next_pkt), 10)
             timestamp_next_pkt = update_timestamp(timestamp_next_pkt, pps)
 
             packets.append(new_pkt)

+ 3 - 4
code/ID2TLib/AttackController.py

@@ -4,7 +4,6 @@ import sys
 from Attack.AttackParameters import Parameter
 from ID2TLib import LabelManager
 from ID2TLib import Statistics
-from ID2TLib.Label import Label
 from ID2TLib.PcapFile import PcapFile
 
 
@@ -40,7 +39,7 @@ class AttackController:
         # Record the attack
         self.added_attacks.append(self.current_attack)
 
-    def process_attack(self, attack: str, params: str):
+    def process_attack(self, attack: str, params: str, context):
         """
         Takes as input the name of an attack (classname) and the attack parameters as string. Parses the string of
         attack parameters, creates the attack by writing the attack packets and returns the path of the written pcap.
@@ -81,11 +80,11 @@ class AttackController:
         # Write attack into pcap file
         print("Generating attack packets...", end=" ")
         sys.stdout.flush()  # force python to print text immediately
-        total_packets, temp_attack_pcap_path = self.current_attack.generate_attack_pcap()
+        total_packets, temp_attack_pcap_path = self.current_attack.generate_attack_pcap(context)
         print("done. (total: " + str(total_packets) + " pkts.)")
 
         # Store label into LabelManager
-        l = Label(attack, self.get_attack_start_utime(),
+        l = LabelManager.Label(attack, self.get_attack_start_utime(),
                   self.get_attack_end_utime(), attack_note)
         self.label_mgr.add_labels(l)
 

+ 49 - 124
code/ID2TLib/Controller.py

@@ -1,21 +1,26 @@
 import os
 import sys
-import readline
+import shutil
 
 from ID2TLib.AttackController import AttackController
 from ID2TLib.LabelManager import LabelManager
 from ID2TLib.PcapFile import PcapFile
 from ID2TLib.Statistics import Statistics
+from ID2TLib.AttackContext import AttackContext
 
 
 class Controller:
-    def __init__(self, pcap_file_path: str, do_extra_tests: bool):
+    def __init__(self, in_pcap_file_path: str, do_extra_tests: bool, out_pcap_file_path):
         """
         Creates a new Controller, acting as a central coordinator for the whole application.
         :param pcap_file_path:
         """
         # Fields
-        self.pcap_src_path = pcap_file_path.strip()
+        self.pcap_src_path = in_pcap_file_path.strip()
+        if out_pcap_file_path:
+            self.pcap_out_path = out_pcap_file_path.strip()
+        else:
+            self.pcap_out_path = None
         self.pcap_dest_path = ''
         self.written_pcaps = []
         self.do_extra_tests = do_extra_tests
@@ -49,9 +54,23 @@ class Controller:
         input dataset.
         :param attacks_config: A list of attacks with their attack parameters.
         """
+
+        # get output directory
+        if self.pcap_out_path:
+            out_dir = os.path.dirname(self.pcap_out_path)
+        else:
+            out_dir = os.path.dirname(self.pcap_src_path)
+        # if out_dir is cwd
+        if out_dir == "":
+            out_dir = "."
+
+        # context for the attack(s)
+        context = AttackContext(out_dir)
+
+        # note if new xml file has been created by MembersMgmtCommAttack
         # load attacks sequentially
         for attack in attacks_config:
-            temp_attack_pcap = self.attack_controller.process_attack(attack[0], attack[1:])
+            temp_attack_pcap = self.attack_controller.process_attack(attack[0], attack[1:], context)
             self.written_pcaps.append(temp_attack_pcap)
 
         # merge attack pcaps to get single attack pcap
@@ -62,7 +81,6 @@ class Controller:
                 attacks_pcap = PcapFile(self.written_pcaps[i])
                 attacks_pcap_path = attacks_pcap.merge_attack(self.written_pcaps[i + 1])
                 os.remove(self.written_pcaps[i + 1])  # remove merged pcap
-                self.written_pcaps[i + 1] = attacks_pcap_path
             print("done.")
         else:
             attacks_pcap_path = self.written_pcaps[0]
@@ -70,7 +88,15 @@ class Controller:
         # merge single attack pcap with all attacks into base pcap
         print("Merging base pcap with single attack pcap...", end=" ")
         sys.stdout.flush()  # force python to print text immediately
+
+        # cp merged PCAP to output path
         self.pcap_dest_path = self.pcap_file.merge_attack(attacks_pcap_path)
+        if self.pcap_out_path:
+            if not self.pcap_out_path.endswith(".pcap"):
+                self.pcap_out_path += ".pcap"
+            os.rename(self.pcap_dest_path, self.pcap_out_path)
+            self.pcap_dest_path = self.pcap_out_path
+
         print("done.")
 
         # delete intermediate PCAP files
@@ -82,8 +108,23 @@ class Controller:
         # write label file with attacks
         self.label_manager.write_label_file(self.pcap_dest_path)
 
+        # pcap_base contains the name of the pcap-file without the ".pcap" extension
+        pcap_base = os.path.splitext(self.pcap_dest_path)[0]
+        created_files = [self.pcap_dest_path, self.label_manager.label_file_path]
+        for suffix, filename in context.get_allocated_files():
+            shutil.move(filename, pcap_base + suffix)
+            created_files.append(pcap_base + suffix)
+        context.reset()
+
         # print status message
-        print('\nOutput files created: \n', self.pcap_dest_path, '\n', self.label_manager.label_file_path)
+        created_files += context.get_other_created_files()
+        created_files.sort()
+        print("\nOutput files created:")
+        for file in created_files:
+            # remove ./ at beginning of file to have only one representation for cwd
+            if file.startswith("./"):
+                file = file[2:]
+            print(file)
 
     def process_db_queries(self, query, print_results=False):
         """
@@ -99,109 +140,13 @@ class Controller:
         else:
             self.statisticsDB.process_db_query(query, print_results)
 
-    @staticmethod
-    def process_help(params):
-        if not params:
-            print("Query mode allows you to enter SQL-queries as well as named queries.")
-            print()
-            print("Named queries:")
-            print("\tSelectors:")
-            print("\t\tmost_used(...)  -> Returns the most occurring element in all elements")
-            print("\t\tleast_used(...) -> Returns the least occurring element in all elements")
-            print("\t\tavg(...)        -> Returns the average of all elements")
-            print("\t\tall(...)        -> Returns all elements")
-            print("\tExtractors:")
-            print("\t\trandom(...)     -> Returns a random element from a list")
-            print("\t\tfirst(...)      -> Returns the first element from a list")
-            print("\t\tlast(...)       -> Returns the last element from a list")
-            print("\tParameterized selectors:")
-            print("\t\tipAddress(...)  -> Returns all IP addresses fulfilling the specified conditions")
-            print("\t\tmacAddress(...) -> Returns all MAC addresses fulfilling the specified conditions")
-            print()
-            print("Miscellaneous:")
-            print("\tlabels            -> List all attacks listed in the label file, if any")
-            print()
-            print("Additional information is available with 'help [KEYWORD];'")
-            print("To get a list of examples, type 'help examples;'")
-            print()
-            return
-
-        param = params[0].lower()
-        if param == "most_used":
-            print("most_used can be used as a selector for the following attributes:")
-            print("ipAddress | macAddress | portNumber | protocolName | ttlValue | mssValue | winSize | ipClass")
-            print()
-        elif param == "least_used":
-            print("least_used can be used as a selector for the following attributes:")
-            print("ipAddress | macAddress | portNumber | protocolName | ttlValue")
-            print()
-        elif param == "avg":
-            print("avg can be used as a selector for the following attributes:")
-            print("pktsReceived | pktsSent | kbytesSent | kbytesReceived | ttlValue | mss")
-            print()
-        elif param == "all":
-            print("all can be used as a selector for the following attributes:")
-            print("ipAddress | ttlValue | mss | macAddress | portNumber | protocolName")
-            print()
-        elif param in ["random", "first", "last"]:
-            print("No additional info available for this keyword.")
-            print()
-        elif param == "ipaddress":
-            print("ipAddress is a parameterized selector which fetches IP addresses based on (a list of) conditions.")
-            print("Conditions are of the following form: PARAMETER OPERATOR VALUE")
-            print("The following parameters can be specified:")
-            print("pktsReceived | pktsSent | kbytesReceived | kbytesSent | maxPktRate | minPktRate | ipClass\n"
-                  "macAddress | ttlValue | ttlCount | portDirection | portNumber | portCount | protocolCount\n"
-                  "protocolName")
-            print()
-            print("See 'help examples;' for usage examples.")
-            print()
-        elif param == "macaddress":
-            print("macAddress is a parameterized selector which fetches MAC addresses based on (a list of) conditions.")
-            print("Conditions are of the following form: PARAMETER OPERATOR VALUE")
-            print("The following parameters can be specified:")
-            print("ipAddress")
-            print()
-            print("See 'help examples;' for usage examples.")
-            print()
-        elif param == "examples":
-            print("Get the average amount of sent packets per IP:")
-            print("\tavg(pktsSent);")
-            print("Get a random IP from all addresses occuring in the pcap:")
-            print("\trandom(all(ipAddress));")
-            print("Return the MAC address of a specified IP:")
-            print("\tmacAddress(ipAddress=192.168.178.2);")
-            print("Get the average TTL-value with SQL:")
-            print("\tSELECT avg(ttlValue) from ip_ttl;")
-            print("Get a random IP address from all addresses that sent and received at least 10 packets:")
-            print("\trandom(ipAddress(pktsSent > 10, pktsReceived > 10));")
-            print()
-        else:
-            print("Unknown keyword '" + param + "', try 'help;' to get a list of allowed keywords'")
-            print()
-
     def enter_query_mode(self):
         """
         Enters into the query mode. This is a read-eval-print-loop, where the user can input named queries or SQL
         queries and the results are printed.
         """
-
-        def make_completer(vocabulary):
-            def custom_template(text, state):
-                results = [x for x in vocabulary if x.startswith(text)] + [None]
-                return results[state]
-            return custom_template
-
-        readline.parse_and_bind('tab: complete')
-        readline.set_completer(make_completer(self.statisticsDB.get_all_named_query_keywords()+self.statisticsDB.get_all_sql_query_keywords()))
-        history_file = os.path.join(os.path.expanduser('~'), 'ID2T_data', 'query_history')
-        try:
-            readline.read_history_file(history_file)
-        except IOError:
-            pass
         print("Entering into query mode...")
-        print("Enter statement ending by ';' and press ENTER to send query. Exit by sending an empty query.")
-        print("Type 'help;' for information on possible queries.")
+        print("Enter statement ending by ';' and press ENTER to send query. Exit by sending an empty query..")
         buffer = ""
         while True:
             line = input("> ")
@@ -212,31 +157,11 @@ class Controller:
             if sqlite3.complete_statement(buffer):
                 try:
                     buffer = buffer.strip()
-                    if buffer.lower().startswith('help'):
-                        buffer = buffer.strip(';')
-                        self.process_help(buffer.split(' ')[1:])
-                    elif buffer.lower().strip() == 'labels;':
-                        if not self.label_manager.labels:
-                            print("No labels found.")
-                        else:
-                            print("Attacks listed in the label file:")
-                            print()
-                            for label in self.label_manager.labels:
-                                print("Attack name:     " + str(label.attack_name))
-                                print("Attack note:     " + str(label.attack_note))
-                                print("Start timestamp: " + str(label.timestamp_start))
-                                print("End timestamp:   " + str(label.timestamp_end))
-                                print()
-                        print()
-                    else:
-                        self.statisticsDB.process_db_query(buffer, True)
+                    self.statisticsDB.process_db_query(buffer, True)
                 except sqlite3.Error as e:
                     print("An error occurred:", e.args[0])
                 buffer = ""
 
-        readline.set_history_length(1000)
-        readline.write_history_file(history_file)
-
     def create_statistics_plot(self, params: str):
         """
         Plots the statistics to a file by using the given customization parameters.

+ 36 - 4
code/ID2TLib/LabelManager.py

@@ -2,7 +2,38 @@ import os.path
 from datetime import datetime
 from xml.dom.minidom import *
 
-import ID2TLib.Label as Label
+from functools import total_ordering
+
+@total_ordering
+class Label:
+    def __init__(self, attack_name, timestamp_start, timestamp_end, attack_note=""):
+        """
+        Creates a new attack label
+
+        :param attack_name: The name of the associated attack
+        :param timestamp_start: The timestamp as unix time of the first attack packet
+        :param timestamp_end: The timestamp as unix time of the last attack packet
+        :param attack_note: A note associated to the attack (optional)
+        """
+        self.attack_name = attack_name
+        self.timestamp_start = timestamp_start
+        self.timestamp_end = timestamp_end
+        self.attack_note = attack_note
+
+    def __eq__(self, other):
+        return self.timestamp == other.timestamp
+
+    def __lt__(self, other):
+        return self.timestamp_start < other.timestamp_start
+
+    def __gt__(self, other):
+        return self.timestamp_start > other.timestamp_start
+
+    def __str__(self):
+        return ''.join(
+            ['(', self.attack_name, ',', self.attack_note, ',', str(self.timestamp_start), ',', str(self.timestamp_end),
+             ')'])
+
 
 
 class LabelManager:
@@ -28,7 +59,8 @@ class LabelManager:
         self.labels = list()
 
         if filepath_pcap is not None:
-            self.label_file_path = filepath_pcap.strip('.pcap') + '_labels.xml'
+            # splitext gives us the filename without extension
+            self.label_file_path = os.path.splitext(filepath_pcap)[0] + '_labels.xml'
             # only load labels if label file is existing
             if os.path.exists(self.label_file_path):
                 self.load_labels()
@@ -83,7 +115,7 @@ class LabelManager:
             return timestamp_root
 
         if filepath is not None:
-            self.label_file_path = filepath.strip('.pcap') + '_labels.xml'
+            self.label_file_path = os.path.splitext(filepath)[0] + '_labels.xml' # splitext removes the file extension
 
         # Generate XML
         doc = Document()
@@ -162,7 +194,7 @@ class LabelManager:
             attack_note = get_value_from_node(a, self.TAG_ATTACK_NOTE, 0)
             timestamp_start = get_value_from_node(a, self.TAG_TIMESTAMP_START, 1, 0)
             timestamp_end = get_value_from_node(a, self.TAG_TIMESTAMP_END, 1, 0)
-            label = Label.Label(attack_name, float(timestamp_start), float(timestamp_end), attack_note)
+            label = Label(attack_name, float(timestamp_start), float(timestamp_end), attack_note)
             self.labels.append(label)
             count_labels += 1
 

+ 338 - 13
code/ID2TLib/Statistics.py

@@ -10,6 +10,7 @@ matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 from ID2TLib.PcapFile import PcapFile
 from ID2TLib.StatsDatabase import StatsDatabase
+from ID2TLib.IPv4 import IPAddress
 
 
 class Statistics:
@@ -499,12 +500,6 @@ class Statistics:
         result_dict = {key: value for (key, value) in result}
         return result_dict
 
-    def get_ip_address_count(self):
-        return self.process_db_query("SELECT COUNT(*) FROM ip_statistics")
-
-    def get_ip_addresses(self):
-        return self.process_db_query("SELECT ipAddress FROM ip_statistics")
-
     def get_random_ip_address(self, count: int = 1):
         """
         :param count: The number of IP addreses to return
@@ -519,13 +514,6 @@ class Statistics:
                 ip_address_list.append(self.process_db_query("random(all(ipAddress))"))
             return ip_address_list
 
-    def get_ip_address_from_mac(self, macAddress: str):
-        """
-        :param macAddress: the MAC address of which the IP shall be returned, if existing in DB
-        :return: the IP address used in the dataset by a given MAC address
-        """
-        return self.process_db_query('ipAddress(macAddress=' + macAddress + ")")
-
     def get_mac_address(self, ipAddress: str):
         """
         :return: The MAC address used in the dataset for the given IP address.
@@ -557,6 +545,154 @@ class Statistics:
         else:
             return None
 
+    def get_in_degree(self):
+        """
+        determines the in-degree for each local ipAddress, i.e. for every IP the count of ipAddresses it has received packets from
+        :return: a list, each entry consists of one local IPAddress and its associated in-degree
+        """
+
+        in_degree_raw = self.stats_db._process_user_defined_query(
+                "SELECT ipAddressA, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'in\' AND portNumber = portA GROUP BY ipAddress " +
+                "UNION " +
+                "SELECT ipAddressB, Count(DISTINCT ipAddressA) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressB WHERE portDirection=\'in\' AND portNumber = portB GROUP BY ipAddress")
+
+        #Because of the structure of the database, there could be 2 entries for the same IP Address, therefore accumulate their sums
+        in_degree = self.filter_multiples(in_degree_raw)
+
+        return in_degree
+
+    def get_out_degree(self):
+        """
+        determines the out-degree for each local ipAddress, i.e. for every IP the count of ipAddresses it has sent packets to
+        :return: a list, each entry consists of one local IPAddress and its associated out-degree
+        """
+        """
+
+        test = self.stats_db._process_user_defined_query("SELECT DISTINCT * FROM conv_statistics")
+        #test2 = self.stats_db._process_user_defined_query("SELECT DISTINCT ipAddressB, portB FROM conv_statistics")
+        print("############# conv_statistics IP's + Ports")
+        for p in test:
+            print(p)
+        #for p in test2:
+        #    print(p)
+
+        print("############## ip_ports ##################")
+        test3 = self.stats_db._process_user_defined_query("SELECT DISTINCT ipAddress, portNumber, portDirection FROM ip_ports")
+        for p in test3:
+            print(p)
+
+        print("")
+        print("############## AFTER JOIN - A #############")
+        test4 = self.stats_db._process_user_defined_query(
+                "SELECT * FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'out\' AND portNumber = portA") # Hier werden die anfang locals rausgefiltert!
+        for p in test4:
+            print(p)
+
+        print("")
+        print("############## AFTER JOIN - B #############")
+        test6 = self.stats_db._process_user_defined_query(
+                "SELECT * FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressB WHERE portDirection=\'out\' AND portNumber = portB") # Hier werden die anfang locals rausgefiltert!
+        for p in test6:
+            print(p)
+
+        print("")
+        print("############## BUILD UP PART FOR PART#############")
+        test5 = self.stats_db._process_user_defined_query(
+                "SELECT ipAddress, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'out\' GROUP BY ipAddress")
+        for p in test5:
+            print(p)
+        """
+        out_degree_raw = self.stats_db._process_user_defined_query(
+                "SELECT ipAddressA, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'out\' AND portNumber = portA GROUP BY ipAddress " +
+                "UNION " +
+                "SELECT ipAddressB, Count(DISTINCT ipAddressA) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressB WHERE portDirection=\'out\' AND portNumber = portB GROUP BY ipAddress")
+
+        #filter out non-local IPs
+        #out_degree_raw_2 = []
+        #for entry in out_degree_raw:
+        #    if IPAddress.parse(entry[0]).is_reserved():
+        #        out_degree_raw_2.append(entry)
+
+        #Because of the structure of the database, there could be 2 entries for the same IP Address, therefore accumulate their sums
+        out_degree = self.filter_multiples(out_degree_raw)
+
+        return out_degree
+
+    def get_avg_delay_local_ext(self):
+        """
+        Calculates the average delay of a packet for external and local communication, based on the tcp handshakes
+        :return: tuple consisting of avg delay for local and external communication, (local, external)
+        """
+
+        conv_delays = self.stats_db._process_user_defined_query("SELECT ipAddressA, ipAddressB, avgDelay FROM conv_statistics")
+        if(conv_delays):
+            external_conv = []
+            local_conv = []
+
+            for conv in conv_delays:
+                IPA = IPAddress.parse(conv[0])
+                IPB = IPAddress.parse(conv[1])
+
+                #split into local and external conversations
+                if(not IPA.is_private() or not IPB.is_private()):
+                    external_conv.append(conv)
+                else:
+                    local_conv.append(conv)
+   
+            # calculate avg local and external delay by summing up the respective delays and dividing them by the number of conversations
+            avg_delay_external = 0.0
+            avg_delay_local = 0.0
+
+            if(local_conv):
+                for conv in local_conv:
+                    avg_delay_local += conv[2]
+                avg_delay_local = (avg_delay_local/len(local_conv)) * 0.001 #ms
+            else:
+                # no local conversations in statistics found
+                avg_delay_local = 0.06
+
+            if(external_conv):
+                for conv in external_conv:
+                    avg_delay_external += conv[2]
+                avg_delay_external = (avg_delay_external/len(external_conv)) * 0.001 #ms
+            else:
+                # no external conversations in statistics found
+                avg_delay_external = 0.15
+        else:
+            #if no statistics were found, use these numbers
+            avg_delay_external = 0.15
+            avg_delay_local = 0.06
+        return avg_delay_local, avg_delay_external
+
+    def filter_multiples(self, entries):
+        """
+        helper function, for get_out_degree and get_in_degree
+        filters the given list for duplicate IpAddresses and, if duplciates are present, accumulates their values
+
+        :param entries: list, each entry consists of an ipAddress and a numeric value
+        :return: a filtered list, without duplicate ipAddresses
+        """
+
+        filtered_entries = []
+        done = []
+        for p1 in entries:
+            added = False
+            if p1 in done:
+                continue
+            for p2 in entries:
+                if p1[0] == p2[0] and p1 != p2:
+                    filtered_entries.append((p1[0], p1[1] + p2[1]))
+                    done.append(p1)
+                    done.append(p2)
+                    #entries.remove(p2)
+                    added = True
+                    break
+
+            if not added:
+                filtered_entries.append(p1)
+
+        return filtered_entries
+
 
     def get_statistics_database(self):
         """
@@ -938,6 +1074,190 @@ class Statistics:
                 plt.savefig(out, dpi=500)
                 return out
 
+        def plot_packets_per_connection(file_ending: str):
+            """
+            Plots the exchanged packets per connection as horizontal bar plot. 
+            Included are 'half-open' connections, where only one packet is exchanged.
+            Note: there may be cutoff problems within the plot if there is to little data.
+
+            :param file_ending: The file extension for the output file containing the plot
+            :return: A filepath to the file containing the created plot
+            """
+            plt.gcf().clear()
+            result = self.stats_db._process_user_defined_query(
+                "SELECT ipAddressA, portA, ipAddressB, portB, pktsCount FROM conv_statistics_stateless")
+
+            if (result):
+                graphy, graphx = [], []
+                # plot data in descending order
+                result = sorted(result, key=lambda row: row[4])
+
+                # compute plot data
+                for i, row in enumerate(result):
+                    addr1, addr2 = "%s:%d" % (row[0], row[1]), "%s:%d" % (row[2], row[3])
+                    # adjust the justification of strings to improve appearance
+                    len_max = max(len(addr1), len(addr2))
+                    addr1 = addr1.ljust(len_max)
+                    addr2 = addr2.ljust(len_max)
+                    # add plot data
+                    graphy.append("%s\n%s" % (addr1, addr2))
+                    graphx.append(row[4])
+
+                # compute plot height in inches
+                dist_mult_height, dist_mult_width = 0.55, 0.07  # these values turned out to work well
+                plt_height, plt_width = len(graphy) * dist_mult_height, max(graphx) * dist_mult_width
+                title_distance = 1 + 0.012*52.8/plt_height  # orginally, a good title distance turned out to be 1.012 with a plot height of 52.8
+
+                # have x axis and its label appear at the top (instead of bottom)
+                fig, ax = plt.subplots()
+                ax.xaxis.tick_top()
+                ax.xaxis.set_label_position("top")
+
+                # set additional plot parameters
+                plt.title("Sent packets per connection", y=title_distance)
+                plt.xlabel('Number of Packets')
+                plt.ylabel('Connection')
+                width = 0.5
+                plt.grid(True)
+                plt.gca().margins(y=0)  # removes the space between data and x-axis within the plot
+                plt.gcf().set_size_inches(plt_width, plt_height)  # set plot size
+
+                # plot the above data, first use plain numbers as graphy to maintain sorting
+                plt.barh(range(len(graphy)), graphx, width, align='center', linewidth=1, color='red', edgecolor='red')
+                # now change the y numbers to the respective address labels
+                plt.yticks(range(len(graphy)), graphy)
+                # try to use tight layout to cut off unnecessary space
+                try:
+                    plt.tight_layout(pad=4)
+                except ValueError:
+                    pass
+
+                # save created figure
+                out = self.pcap_filepath.replace('.pcap', '_plot-PktCount per Connection Distribution' + file_ending)
+                plt.savefig(out, dpi=500)
+                return out
+            else:
+                print("Error plot protocol: No protocol values found!")
+
+        def plot_out_degree(file_ending: str):
+            plt.gcf().clear()
+            out_degree = self.get_out_degree()
+            #print("")
+            #print("#############in plot_out_degree###########")
+            #print(out_degree)
+
+            graphx, graphy = [], []
+            for entry in out_degree:
+                graphx.append(entry[0])
+                graphy.append(entry[1])
+            plt.autoscale(enable=True, axis='both')
+            plt.title("Outdegree")
+            plt.xlabel('IpAddress')
+            plt.ylabel('Outdegree')
+            width = 0.1
+            plt.xlim([0, len(graphx)])
+            plt.grid(True)
+
+            x = range(0,len(graphx))
+            my_xticks = graphx
+            plt.xticks(x, my_xticks)
+
+            plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
+            out = self.pcap_filepath.replace('.pcap', '_out_degree' + file_ending)
+            plt.savefig(out,dpi=500)
+            return out
+
+        def plot_in_degree(file_ending: str):
+            plt.gcf().clear()
+            in_degree = self.get_in_degree()
+
+            graphx, graphy = [], []
+            for entry in in_degree:
+                graphx.append(entry[0])
+                graphy.append(entry[1])
+            plt.autoscale(enable=True, axis='both')
+            plt.title("Indegree")
+            plt.xlabel('IpAddress')
+            plt.ylabel('Indegree')
+            width = 0.1
+            plt.xlim([0, len(graphx)])
+            plt.grid(True)
+
+            x = range(0,len(graphx))
+            my_xticks = graphx
+            plt.xticks(x, my_xticks)
+
+            plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
+            out = self.pcap_filepath.replace('.pcap', '_in_degree' + file_ending)
+            plt.savefig(out,dpi=500)
+            return out
+
+        def plot_avgpkts_per_comm_interval(file_ending: str):
+            """
+            Plots the exchanged packets per connection as horizontal bar plot. 
+            Included are 'half-open' connections, where only one packet is exchanged.
+            Note: there may be cutoff problems within the plot if there is to little data.
+
+            :param file_ending: The file extension for the output file containing the plot
+            :return: A filepath to the file containing the created plot
+            """
+            plt.gcf().clear()
+            result = self.stats_db._process_user_defined_query(
+                "SELECT ipAddressA, portA, ipAddressB, portB, avgPktCount FROM comm_interval_statistics")
+
+            if (result):
+                graphy, graphx = [], []
+                # plot data in descending order
+                result = sorted(result, key=lambda row: row[4])
+
+                # compute plot data
+                for i, row in enumerate(result):
+                    addr1, addr2 = "%s:%d" % (row[0], row[1]), "%s:%d" % (row[2], row[3])
+                    # adjust the justification of strings to improve appearance
+                    len_max = max(len(addr1), len(addr2))
+                    addr1 = addr1.ljust(len_max)
+                    addr2 = addr2.ljust(len_max)
+                    # add plot data
+                    graphy.append("%s\n%s" % (addr1, addr2))
+                    graphx.append(row[4])
+
+                # compute plot height in inches
+                dist_mult_height, dist_mult_width = 0.55, 0.07  # these values turned out to work well
+                plt_height, plt_width = len(graphy) * dist_mult_height, max(graphx) * dist_mult_width
+                title_distance = 1 + 0.012*52.8/plt_height  # orginally, a good title distance turned out to be 1.012 with a plot height of 52.8
+
+                # have x axis and its label appear at the top (instead of bottom)
+                fig, ax = plt.subplots()
+                ax.xaxis.tick_top()
+                ax.xaxis.set_label_position("top")
+
+                # set additional plot parameters
+                plt.title("Average number of packets per communication interval", y=title_distance)
+                plt.xlabel('Number of Packets')
+                plt.ylabel('Connection')
+                width = 0.5
+                plt.grid(True)
+                plt.gca().margins(y=0)  # removes the space between data and x-axis within the plot
+                plt.gcf().set_size_inches(plt_width, plt_height)  # set plot size
+
+                # plot the above data, first use plain numbers as graphy to maintain sorting
+                plt.barh(range(len(graphy)), graphx, width, align='center', linewidth=1, color='red', edgecolor='red')
+                # now change the y numbers to the respective address labels
+                plt.yticks(range(len(graphy)), graphy)
+                # try to use tight layout to cut off unnecessary space
+                try:
+                    plt.tight_layout(pad=4)
+                except ValueError:
+                    pass
+
+                # save created figure
+                out = self.pcap_filepath.replace('.pcap', '_plot-Avg PktCount Communication Interval Distribution' + file_ending)
+                plt.savefig(out, dpi=500)
+                return out
+            else:
+                print("Error plot protocol: No protocol values found!")
+
+
         ttl_out_path = plot_ttl('.' + format)
         mss_out_path = plot_mss('.' + format)
         win_out_path = plot_win('.' + format)
@@ -953,6 +1273,10 @@ class Statistics:
         plot_interval_new_tos = plot_interval_new_tos('.' + format)
         plot_interval_new_win_size = plot_interval_new_win_size('.' + format)
         plot_interval_new_mss = plot_interval_new_mss('.' + format)
+        plot_packets_per_connection_out = plot_packets_per_connection('.' + format)
+        plot_out_degree = plot_out_degree('.' + format)
+        plot_in_degree = plot_in_degree('.' + format)
+        plot_avgpkts_per_comm_interval_out = plot_avgpkts_per_comm_interval('.' + format)
 
         ## Time consuming plot
         # port_out_path = plot_port('.' + format)
@@ -961,3 +1285,4 @@ class Statistics:
         # ip_dst_out_path = plot_ip_dst('.' + format)
 
         print("Saved plots in the input PCAP directory.")
+        print("In-/Out-/Overall-degree plots not fully finished yet")

+ 1 - 1
code/ID2TLib/StatsDatabase.py

@@ -298,7 +298,7 @@ class StatsDatabase:
 
         # Print results if option print_results is True
         if print_results:
-            if isinstance(result, list) and len(result) == 1:
+            if len(result) == 1 and isinstance(result, list):
                 result = result[0]
                 print("Query returned 1 record:\n")
                 for i in range(0, len(result)):

+ 23 - 18
code_boost/src/cxx/pcap_processor.cpp

@@ -52,18 +52,10 @@ std::string pcap_processor::merge_pcaps(const std::string pcap_path) {
     std::string new_filepath = filePath;
     const std::string &newExt = "_" + tstmp + ".pcap";
     std::string::size_type h = new_filepath.rfind('.', new_filepath.length());
-
-    if ((filePath.length() + newExt.length()) < 250) {
-
-        if (h != std::string::npos) {
-            new_filepath.replace(h, newExt.length(), newExt);
-        } else {
-            new_filepath.append(newExt);
-        }
-    }
-
-    else {
-        new_filepath = (new_filepath.substr(0, new_filepath.find('_'))).append(newExt);
+    if (h != std::string::npos) {
+        new_filepath.replace(h, newExt.length(), newExt);
+    } else {
+        new_filepath.append(newExt);
     }
 
     FileSniffer sniffer_base(filePath);
@@ -166,6 +158,9 @@ void pcap_processor::collect_statistics() {
         
         // Save timestamp of last packet into statistics
         stats.setTimestampLastPacket(currentPktTimestamp);
+
+        // Create the communication interval statistics from all gathered communication intervals
+        stats.createCommIntervalStats();
     }
 }
 
@@ -252,15 +247,19 @@ void pcap_processor::process_packets(const Packet &pkt) {
         if (p == PDU::PDUType::TCP) {
             TCP tcpPkt = (const TCP &) *pdu_l4;
             
-          // Check TCP checksum
-          if (pdu_l3_type == PDU::PDUType::IP) {
-            stats.checkTCPChecksum(ipAddressSender, ipAddressReceiver, tcpPkt);
-          }
+            // Check TCP checksum
+            if (pdu_l3_type == PDU::PDUType::IP) {
+              stats.checkTCPChecksum(ipAddressSender, ipAddressReceiver, tcpPkt);
+            }
 
             stats.incrementProtocolCount(ipAddressSender, "TCP");                        
 
             // Conversation statistics
             stats.addConvStat(ipAddressSender, tcpPkt.sport(), ipAddressReceiver, tcpPkt.dport(), pkt.timestamp());
+            stats.addConvStatStateless(ipAddressSender, tcpPkt.sport(), ipAddressReceiver, tcpPkt.dport(), pkt.timestamp()); 
+
+            // Communication interval data collection for the later created communication statistics
+            stats.addCommInterval(ipAddressSender, tcpPkt.sport(), ipAddressReceiver, tcpPkt.dport(), pkt.timestamp());
 
             // Window Size distribution
             int win = tcpPkt.window();
@@ -279,8 +278,14 @@ void pcap_processor::process_packets(const Packet &pkt) {
           // UDP Packet
         } else if (p == PDU::PDUType::UDP) {
             const UDP udpPkt = (const UDP &) *pdu_l4;
-            stats.incrementProtocolCount(ipAddressSender, "UDP");            
-            stats.incrementPortCount(ipAddressSender, udpPkt.sport(), ipAddressReceiver, udpPkt.dport());                        
+            stats.incrementProtocolCount(ipAddressSender, "UDP");   
+
+            // Conversation statistics
+            stats.addConvStatStateless(ipAddressSender, udpPkt.sport(), ipAddressReceiver, udpPkt.dport(), pkt.timestamp());           
+            stats.incrementPortCount(ipAddressSender, udpPkt.sport(), ipAddressReceiver, udpPkt.dport());      
+
+            // Communication interval data collection for the later created communication statistics
+            stats.addCommInterval(ipAddressSender, udpPkt.sport(), ipAddressReceiver, udpPkt.dport(), pkt.timestamp());       
           
         } else if (p == PDU::PDUType::ICMP) {
             stats.incrementProtocolCount(ipAddressSender, "ICMP");

+ 114 - 39
code_boost/src/cxx/statistics.cpp

@@ -247,6 +247,118 @@ void statistics::addConvStat(std::string ipAddressSender,int sport,std::string i
     }
 }
 
+/**
+ * Registers statistical data for a sent packet in a given stateless conversation (two IPs, two ports). 
+ * Increments the counter packets_A_B or packets_B_A.
+ * Adds the timestamp of the packet in pkts_A_B_timestamp or pkts_B_A_timestamp.
+ * @param ipAddressSender The sender IP address.
+ * @param sport The source port.
+ * @param ipAddressReceiver The receiver IP address.
+ * @param dport The destination port.
+ * @param timestamp The timestamp of the packet.
+ */
+void statistics::addConvStatStateless(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp){
+
+    conv f1 = {ipAddressReceiver, dport, ipAddressSender, sport};
+    conv f2 = {ipAddressSender, sport, ipAddressReceiver, dport};
+
+    // if already exist A(ipAddressReceiver, dport), B(ipAddressSender, sport) conversation
+    if (conv_statistics_stateless.count(f1)>0){
+        conv_statistics_stateless[f1].pkts_count++;
+        if(conv_statistics_stateless[f1].pkts_count<=3)
+            conv_statistics_stateless[f1].interarrival_time.push_back(std::chrono::duration_cast<std::chrono::microseconds> (timestamp - conv_statistics_stateless[f1].pkts_timestamp.back()));
+        conv_statistics_stateless[f1].pkts_timestamp.push_back(timestamp);
+    }
+    // Add new conversation A(ipAddressSender, sport), B(ipAddressReceiver, dport)
+    else{
+        conv_statistics_stateless[f2].pkts_count++;
+        if(conv_statistics_stateless[f2].pkts_timestamp.size()>0 && conv_statistics_stateless[f2].pkts_count<=3 )
+            conv_statistics_stateless[f2].interarrival_time.push_back(std::chrono::duration_cast<std::chrono::microseconds> (timestamp - conv_statistics_stateless[f2].pkts_timestamp.back()));
+        conv_statistics_stateless[f2].pkts_timestamp.push_back(timestamp);
+    }
+}
+
+/**
+ * Adds the passed information to the relevant communication intervals of the respective conversation.
+ * If the time between the last message of the latest interval and the timestamp of the current message exceeds
+ * the threshold, a new interval is created.
+ * Note: here and within the function, conversation refers to a stateless conversation.
+ * @param ipAddressSender The sender IP address.
+ * @param sport The source port.
+ * @param ipAddressReceiver The receiver IP address.
+ * @param dport The destination port.
+ * @param timestamp The timestamp of the packet.
+ */
+void statistics::addCommInterval(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp){
+    conv f1 = {ipAddressReceiver, dport, ipAddressSender, sport};
+    conv f2 = {ipAddressSender, sport, ipAddressReceiver, dport};
+    conv f;
+
+    // if there already exists a communication interval for the specified conversation ...
+    if (comm_intervals.count(f1) > 0 || comm_intervals.count(f2) > 0){
+
+        // find out which direction of conversation is contained in comm_intervals
+        if (comm_intervals.count(f1) > 0)
+            f = f1;
+        else
+            f = f2;
+
+        // if the time difference is exceeded, create a new interval with this message
+        if (timestamp - comm_intervals[f].back().end > (std::chrono::microseconds) ((unsigned long) COMM_INTERVAL_THRESHOLD)) {  // > or >= ?
+            commInterval new_interval = {timestamp, timestamp, 1};
+            comm_intervals[f].push_back(new_interval);
+        }  
+        // otherwise, set the time of the last interval message to the current timestamp and increase interval packet count by 1
+        else{
+            comm_intervals[f].back().end = timestamp;
+            comm_intervals[f].back().pkts_count++;
+        }
+    }
+    // if there does not exist a communication interval for the specified conversation ...
+    else{
+        // add initial interval for this conversation
+        commInterval initial_interval = {timestamp, timestamp, 1};
+
+        std::vector<commInterval> intervals;
+        intervals.push_back(initial_interval);
+        comm_intervals[f1] = intervals;
+    }
+}
+
+/**
+ * Aggregate the collected information about all communication intervals of every conversation.
+ * Do this by computing the average packet rate per interval and the average time between intervals.
+ * Note: here and within the function, conversation refers to a stateless conversation.
+ */
+void statistics::createCommIntervalStats(){    
+    // iterate over all <conv, conv_intervals> pairs
+    for (auto &cur_elem : comm_intervals) {
+        conv cur_conv = cur_elem.first;
+        std::vector<commInterval> intervals = cur_elem.second;
+
+        // if there is only one interval, the time between intervals cannot be computed and is therefore set to 0
+        if (intervals.size() == 1){
+            entry_commIntervalStat e = {(double) intervals[0].pkts_count, (double) 0};
+            comm_interval_statistics[cur_conv] = e;
+        }
+        // If there is more than one interval, compute the specified averages
+        else if (intervals.size() > 1){
+            long summed_pkts_count = intervals[0].pkts_count;
+            std::chrono::microseconds time_between_ints_sum = (std::chrono::microseconds) 0;
+
+            for (int i = 1; i < intervals.size(); i++) {
+                summed_pkts_count += intervals[i].pkts_count;
+                time_between_ints_sum += intervals[i].start - intervals[i - 1].end;
+            }
+
+            double avg_pkts_count = summed_pkts_count / ((double) intervals.size());
+            double avg_time_betw_ints = (time_between_ints_sum.count() / (double) (intervals.size() - 1)) / (double) 1e6;
+            entry_commIntervalStat e = {avg_pkts_count, avg_time_betw_ints};
+            comm_interval_statistics[cur_conv] = e;
+        }
+    }
+}
+
 /**
  * Increments the packet counter for the given IP address and MSS value.
  * @param ipAddress The IP address whose MSS packet counter should be incremented.
@@ -601,7 +713,9 @@ void statistics::writeToDatabase(std::string database_path) {
         db.writeStatisticsToS(tos_distribution);
         db.writeStatisticsWin(win_distribution);
         db.writeStatisticsConv(conv_statistics);
+        db.writeStatisticsConvStateless(conv_statistics_stateless);
         db.writeStatisticsInterval(interval_statistics);
+        db.writeCommIntervalStats(comm_interval_statistics);
     }
     else {
         // Tinslib failed to recognize the types of the packets in the input PCAP
@@ -609,42 +723,3 @@ void statistics::writeToDatabase(std::string database_path) {
         return;
     }
 }
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-

+ 55 - 0
code_boost/src/cxx/statistics.h

@@ -15,6 +15,8 @@
 
 using namespace Tins;
 
+#define COMM_INTERVAL_THRESHOLD 10e6  // in microseconds; i.e. here 10s
+
 /*
  * Definition of structs used in unordered_map fields
  */
@@ -263,6 +265,40 @@ struct ipAddress_inOut_port {
     }
 };
 
+/*
+ * Struct used to represent a communication interval (for two hosts):
+ * - Timestamp of the first packet in the interval
+ * - Timestamp of the last packet in the interval
+ * - The count of packets within the interval
+ */
+struct commInterval{
+    std::chrono::microseconds start;
+    std::chrono::microseconds end;
+    long pkts_count;
+
+    bool operator==(const commInterval &other) const {
+        return start == other.start
+               && end == other.end
+               && pkts_count == other.pkts_count;
+    }    
+};
+
+/*
+ * Struct used to represent for the communication intervals of two hosts:
+ * - Average time between intervals
+ * - The average count of packets within an interval
+ */
+struct entry_commIntervalStat{
+    double avg_pkts_count;
+    double avg_time_between;
+
+    bool operator==(const entry_commIntervalStat &other) const {
+        return avg_pkts_count == other.avg_pkts_count
+               && avg_time_between == other.avg_time_between;
+    }    
+};
+
+
 /*
  * Definition of hash functions for structs used as key in unordered_map
  */
@@ -373,6 +409,12 @@ public:
 
     void addConvStat(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp);
 
+    void addConvStatStateless(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp);
+
+    void addCommInterval(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp);
+
+    void createCommIntervalStats();
+
     std::vector<float> calculateIPsCumEntropy();
 
     std::vector<float> calculateLastIntervalIPsEntropy(std::chrono::microseconds intervalStartTimestamp);
@@ -468,6 +510,7 @@ private:
     int intervalCumNovelMSSCount = 0;
     int intervalCumNovelPortCount = 0;
 
+
     /*
      * Data containers
      */
@@ -487,6 +530,11 @@ private:
     // average of inter-arrival times}
     std::unordered_map<conv, entry_convStat> conv_statistics;
 
+    // {IP Address A, Port A, IP Address B, Port B,   #packets, packets timestamps, inter-arrival times,
+    // average of inter-arrival times}
+    // Also stores conversation with only one exchanged message. In this case avgPktRate, minDelay, maxDelay and avgDelay are -1
+    std::unordered_map<conv, entry_convStat> conv_statistics_stateless;
+
     // {Last timestamp in the interval, #packets, #bytes, source IP entropy, destination IP entropy,
     // source IP cumulative entropy, destination IP cumulative entropy, #payload, #incorrect TCP checksum,
     // #correct TCP checksum, #novel IP, #novel TTL, #novel Window Size, #novel ToS,#novel MSS}
@@ -518,6 +566,13 @@ private:
 
     // {IP Address, MAC Address}
     std::unordered_map<std::string, std::string> ip_mac_mapping;
+
+    // {IP Address A, Port A, IP Address B, Port B, listof(commInterval)}
+    // Used to manage all communication intervals for a pair of communicating hosts
+    std::unordered_map<conv, std::vector<commInterval> > comm_intervals;
+
+    // {IP Address A, Port A, IP Address B, Port B, avg #packets, avg time between intervals}
+    std::unordered_map<conv, entry_commIntervalStat> comm_interval_statistics;
 };
 
 

+ 132 - 5
code_boost/src/cxx/statistics_db.cpp

@@ -27,13 +27,13 @@ void statistics_db::writeStatisticsIP(std::unordered_map<std::string, entry_ipSt
         SQLite::Transaction transaction(*db);
         const char *createTable = "CREATE TABLE ip_statistics ( "
                 "ipAddress TEXT, "
-                "pktsReceived INTEGER, "
+                "pktsReceived INTEGtimestampER, "
                 "pktsSent INTEGER, "
                 "kbytesReceived REAL, "
                 "kbytesSent REAL, "
                 "maxPktRate REAL,"
                 "minPktRate REAL,"
-                "ipClass TEXT COLLATE NOCASE, "
+                "ipClass TEXT, "
                 "PRIMARY KEY(ipAddress));";
         db->exec(createTable);
         SQLite::Statement query(*db, "INSERT INTO ip_statistics VALUES (?, ?, ?, ?, ?, ?, ?, ?)");
@@ -187,7 +187,7 @@ void statistics_db::writeStatisticsProtocols(std::unordered_map<ipAddress_protoc
         SQLite::Transaction transaction(*db);
         const char *createTable = "CREATE TABLE ip_protocols ("
                 "ipAddress TEXT,"
-                "protocolName TEXT COLLATE NOCASE,"
+                "protocolName TEXT,"
                 "protocolCount INTEGER,"
                 "PRIMARY KEY(ipAddress,protocolName));";
         db->exec(createTable);
@@ -217,7 +217,7 @@ void statistics_db::writeStatisticsPorts(std::unordered_map<ipAddress_inOut_port
         SQLite::Transaction transaction(*db);
         const char *createTable = "CREATE TABLE ip_ports ("
                 "ipAddress TEXT,"
-                "portDirection TEXT COLLATE NOCASE,"
+                "portDirection TEXT,"
                 "portNumber INTEGER,"
                 "portCount INTEGER,"
                 "PRIMARY KEY(ipAddress,portDirection,portNumber));";
@@ -249,7 +249,7 @@ void statistics_db::writeStatisticsIpMac(std::unordered_map<std::string, std::st
         SQLite::Transaction transaction(*db);
         const char *createTable = "CREATE TABLE ip_mac ("
                 "ipAddress TEXT,"
-                "macAddress TEXT COLLATE NOCASE,"
+                "macAddress TEXT,"
                 "PRIMARY KEY(ipAddress));";
         db->exec(createTable);
         SQLite::Statement query(*db, "INSERT INTO ip_mac VALUES (?, ?)");
@@ -380,6 +380,91 @@ void statistics_db::writeStatisticsConv(std::unordered_map<conv, entry_convStat>
     }
 }
 
+
+/**
+ * Writes the stateless conversation statistics into the database.
+ * @param convStatistics The stateless conversation from class statistics.
+ */
+void statistics_db::writeStatisticsConvStateless(std::unordered_map<conv, entry_convStat> convStatistics){          
+    try {
+        db->exec("DROP TABLE IF EXISTS conv_statistics_stateless");
+        SQLite::Transaction transaction(*db);
+        const char *createTable = "CREATE TABLE conv_statistics_stateless ("
+                "ipAddressA TEXT,"
+                "portA INTEGER,"
+                "ipAddressB TEXT,"              
+                "portB INTEGER,"
+                "pktsCount INTEGER,"
+                "avgPktRate REAL,"
+                "avgDelay INTEGER,"
+                "minDelay INTEGER,"
+                "maxDelay INTEGER,"
+                "PRIMARY KEY(ipAddressA,portA,ipAddressB,portB));";
+        db->exec(createTable);
+        SQLite::Statement query(*db, "INSERT INTO conv_statistics_stateless VALUES (?, ?, ?, ?, ?,  ?, ?, ?, ?)");
+
+        // Calculate average of inter-arrival times and average packet rate
+        for (auto it = convStatistics.begin(); it != convStatistics.end(); ++it) {
+            conv f = it->first;
+            entry_convStat e = it->second;
+            if (e.pkts_count > 0){
+                query.bind(1, f.ipAddressA);
+                query.bind(2, f.portA);
+                query.bind(3, f.ipAddressB);
+                query.bind(4, f.portB);
+
+                if (e.pkts_count == 1){
+                    e.avg_pkt_rate = (float) -1;
+                    e.avg_interarrival_time = (std::chrono::microseconds) -1;
+
+                    query.bind(5, (int) e.pkts_count);
+                    query.bind(6, (float) e.avg_pkt_rate);
+                    query.bind(7, (int) e.avg_interarrival_time.count());
+                    query.bind(8, -1);
+                    query.bind(9, -1);
+                    query.exec();
+                    query.reset();
+                }
+                else {
+                    int sumDelay = 0;
+                    int minDelay = -1;
+                    int maxDelay = -1;
+                    for (int i = 0; (unsigned) i < e.interarrival_time.size(); i++) {
+                        sumDelay += e.interarrival_time[i].count();
+                        if (maxDelay < e.interarrival_time[i].count())
+                            maxDelay = e.interarrival_time[i].count();
+                        if (minDelay > e.interarrival_time[i].count() || minDelay == -1)
+                            minDelay = e.interarrival_time[i].count();
+                    }
+                    if (e.interarrival_time.size() > 0)
+                        e.avg_interarrival_time = (std::chrono::microseconds) sumDelay / e.interarrival_time.size(); // average
+                    else e.avg_interarrival_time = (std::chrono::microseconds) 0;
+
+                    std::chrono::microseconds start_timesttamp = e.pkts_timestamp[0];
+                    std::chrono::microseconds end_timesttamp = e.pkts_timestamp.back();
+                    std::chrono::microseconds conn_duration = end_timesttamp - start_timesttamp;
+                    e.avg_pkt_rate = (float) e.pkts_count * 1000000 / conn_duration.count(); // pkt per sec
+
+                    
+                    query.bind(5, (int) e.pkts_count);
+                    query.bind(6, (float) e.avg_pkt_rate);
+                    query.bind(7, (int) e.avg_interarrival_time.count());
+                    query.bind(8, minDelay);
+                    query.bind(9, maxDelay);
+                    query.exec();
+                    query.reset();
+                }
+            }
+            
+        }
+        transaction.commit();
+    }
+    catch (std::exception &e) {
+        std::cout << "Exception in statistics_db: " << e.what() << std::endl;
+    }
+}
+
+
 /**
  * Writes the interval statistics into the database.
  * @param intervalStatistics The interval entries from class statistics.
@@ -438,3 +523,45 @@ void statistics_db::writeStatisticsInterval(std::unordered_map<std::string, entr
     }
 }
 
+/**
+ * Writes the communication interval statistics for every conversation into the database.
+ * @param commIntervalStatistics The communication interval statistics from class statistics.
+ */
+void statistics_db::writeCommIntervalStats(std::unordered_map<conv, entry_commIntervalStat> commIntervalStatistics){
+    try {
+        db->exec("DROP TABLE IF EXISTS comm_interval_statistics");
+        SQLite::Transaction transaction(*db);
+        const char *createTable = "CREATE TABLE comm_interval_statistics ("
+                "ipAddressA TEXT,"
+                "portA INTEGER,"
+                "ipAddressB TEXT,"              
+                "portB INTEGER,"
+                "avgPktCount REAL,"
+                "avgTimeBetweenIntervals REAL,"
+                "PRIMARY KEY(ipAddressA,portA,ipAddressB,portB));";
+        db->exec(createTable);
+        SQLite::Statement query(*db, "INSERT INTO comm_interval_statistics VALUES (?, ?, ?, ?, ?, ?)");
+
+        // iterate over every conversation and interval aggregation pair and store the respective values in the database
+        for (auto it = commIntervalStatistics.begin(); it != commIntervalStatistics.end(); ++it) {
+            conv f = it->first;
+            entry_commIntervalStat e = it->second;
+            if (e.avg_pkts_count > 0){
+                query.bind(1, f.ipAddressA);
+                query.bind(2, f.portA);
+                query.bind(3, f.ipAddressB);
+                query.bind(4, f.portB);
+                query.bind(5, e.avg_pkts_count);
+                query.bind(6, e.avg_time_between);
+                
+                query.exec();
+                query.reset();
+            }
+            
+        }
+        transaction.commit();
+    }
+    catch (std::exception &e) {
+        std::cout << "Exception in statistics_db: " << e.what() << std::endl;
+    }
+}

+ 4 - 0
code_boost/src/cxx/statistics_db.h

@@ -43,8 +43,12 @@ public:
 
     void writeStatisticsConv(std::unordered_map<conv, entry_convStat> convStatistics);
 
+    void writeStatisticsConvStateless(std::unordered_map<conv, entry_convStat> convStatistics);
+
     void writeStatisticsInterval(std::unordered_map<std::string, entry_intervalStat> intervalStatistics);
 
+    void writeCommIntervalStats(std::unordered_map<conv, entry_commIntervalStat> commIntervalStatistics);
+
 private:
     // Pointer to the SQLite database
     std::unique_ptr<SQLite::Database> db;