|
@@ -10,7 +10,6 @@ matplotlib.use('Agg')
|
|
import matplotlib.pyplot as plt
|
|
import matplotlib.pyplot as plt
|
|
from ID2TLib.PcapFile import PcapFile
|
|
from ID2TLib.PcapFile import PcapFile
|
|
from ID2TLib.StatsDatabase import StatsDatabase
|
|
from ID2TLib.StatsDatabase import StatsDatabase
|
|
-from ID2TLib.IPv4 import IPAddress
|
|
|
|
|
|
|
|
|
|
|
|
class Statistics:
|
|
class Statistics:
|
|
@@ -500,6 +499,12 @@ class Statistics:
|
|
result_dict = {key: value for (key, value) in result}
|
|
result_dict = {key: value for (key, value) in result}
|
|
return result_dict
|
|
return result_dict
|
|
|
|
|
|
|
|
+ def get_ip_address_count(self):
|
|
|
|
+ return self.process_db_query("SELECT COUNT(*) FROM ip_statistics")
|
|
|
|
+
|
|
|
|
+ def get_ip_addresses(self):
|
|
|
|
+ return self.process_db_query("SELECT ipAddress FROM ip_statistics")
|
|
|
|
+
|
|
def get_random_ip_address(self, count: int = 1):
|
|
def get_random_ip_address(self, count: int = 1):
|
|
"""
|
|
"""
|
|
:param count: The number of IP addreses to return
|
|
:param count: The number of IP addreses to return
|
|
@@ -514,6 +519,13 @@ class Statistics:
|
|
ip_address_list.append(self.process_db_query("random(all(ipAddress))"))
|
|
ip_address_list.append(self.process_db_query("random(all(ipAddress))"))
|
|
return ip_address_list
|
|
return ip_address_list
|
|
|
|
|
|
|
|
+ def get_ip_address_from_mac(self, macAddress: str):
|
|
|
|
+ """
|
|
|
|
+ :param macAddress: the MAC address of which the IP shall be returned, if existing in DB
|
|
|
|
+ :return: the IP address used in the dataset by a given MAC address
|
|
|
|
+ """
|
|
|
|
+ return self.process_db_query('ipAddress(macAddress=' + macAddress + ")")
|
|
|
|
+
|
|
def get_mac_address(self, ipAddress: str):
|
|
def get_mac_address(self, ipAddress: str):
|
|
"""
|
|
"""
|
|
:return: The MAC address used in the dataset for the given IP address.
|
|
:return: The MAC address used in the dataset for the given IP address.
|
|
@@ -545,154 +557,6 @@ class Statistics:
|
|
else:
|
|
else:
|
|
return None
|
|
return None
|
|
|
|
|
|
- def get_in_degree(self):
|
|
|
|
- """
|
|
|
|
- determines the in-degree for each local ipAddress, i.e. for every IP the count of ipAddresses it has received packets from
|
|
|
|
- :return: a list, each entry consists of one local IPAddress and its associated in-degree
|
|
|
|
- """
|
|
|
|
-
|
|
|
|
- in_degree_raw = self.stats_db._process_user_defined_query(
|
|
|
|
- "SELECT ipAddressA, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'in\' AND portNumber = portA GROUP BY ipAddress " +
|
|
|
|
- "UNION " +
|
|
|
|
- "SELECT ipAddressB, Count(DISTINCT ipAddressA) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressB WHERE portDirection=\'in\' AND portNumber = portB GROUP BY ipAddress")
|
|
|
|
-
|
|
|
|
- #Because of the structure of the database, there could be 2 entries for the same IP Address, therefore accumulate their sums
|
|
|
|
- in_degree = self.filter_multiples(in_degree_raw)
|
|
|
|
-
|
|
|
|
- return in_degree
|
|
|
|
-
|
|
|
|
- def get_out_degree(self):
|
|
|
|
- """
|
|
|
|
- determines the out-degree for each local ipAddress, i.e. for every IP the count of ipAddresses it has sent packets to
|
|
|
|
- :return: a list, each entry consists of one local IPAddress and its associated out-degree
|
|
|
|
- """
|
|
|
|
- """
|
|
|
|
-
|
|
|
|
- test = self.stats_db._process_user_defined_query("SELECT DISTINCT * FROM conv_statistics")
|
|
|
|
- #test2 = self.stats_db._process_user_defined_query("SELECT DISTINCT ipAddressB, portB FROM conv_statistics")
|
|
|
|
- print("############# conv_statistics IP's + Ports")
|
|
|
|
- for p in test:
|
|
|
|
- print(p)
|
|
|
|
- #for p in test2:
|
|
|
|
- # print(p)
|
|
|
|
-
|
|
|
|
- print("############## ip_ports ##################")
|
|
|
|
- test3 = self.stats_db._process_user_defined_query("SELECT DISTINCT ipAddress, portNumber, portDirection FROM ip_ports")
|
|
|
|
- for p in test3:
|
|
|
|
- print(p)
|
|
|
|
-
|
|
|
|
- print("")
|
|
|
|
- print("############## AFTER JOIN - A #############")
|
|
|
|
- test4 = self.stats_db._process_user_defined_query(
|
|
|
|
- "SELECT * FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'out\' AND portNumber = portA") # Hier werden die anfang locals rausgefiltert!
|
|
|
|
- for p in test4:
|
|
|
|
- print(p)
|
|
|
|
-
|
|
|
|
- print("")
|
|
|
|
- print("############## AFTER JOIN - B #############")
|
|
|
|
- test6 = self.stats_db._process_user_defined_query(
|
|
|
|
- "SELECT * FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressB WHERE portDirection=\'out\' AND portNumber = portB") # Hier werden die anfang locals rausgefiltert!
|
|
|
|
- for p in test6:
|
|
|
|
- print(p)
|
|
|
|
-
|
|
|
|
- print("")
|
|
|
|
- print("############## BUILD UP PART FOR PART#############")
|
|
|
|
- test5 = self.stats_db._process_user_defined_query(
|
|
|
|
- "SELECT ipAddress, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'out\' GROUP BY ipAddress")
|
|
|
|
- for p in test5:
|
|
|
|
- print(p)
|
|
|
|
- """
|
|
|
|
- out_degree_raw = self.stats_db._process_user_defined_query(
|
|
|
|
- "SELECT ipAddressA, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'out\' AND portNumber = portA GROUP BY ipAddress " +
|
|
|
|
- "UNION " +
|
|
|
|
- "SELECT ipAddressB, Count(DISTINCT ipAddressA) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressB WHERE portDirection=\'out\' AND portNumber = portB GROUP BY ipAddress")
|
|
|
|
-
|
|
|
|
- #filter out non-local IPs
|
|
|
|
- #out_degree_raw_2 = []
|
|
|
|
- #for entry in out_degree_raw:
|
|
|
|
- # if IPAddress.parse(entry[0]).is_reserved():
|
|
|
|
- # out_degree_raw_2.append(entry)
|
|
|
|
-
|
|
|
|
- #Because of the structure of the database, there could be 2 entries for the same IP Address, therefore accumulate their sums
|
|
|
|
- out_degree = self.filter_multiples(out_degree_raw)
|
|
|
|
-
|
|
|
|
- return out_degree
|
|
|
|
-
|
|
|
|
- def get_avg_delay_local_ext(self):
|
|
|
|
- """
|
|
|
|
- Calculates the average delay of a packet for external and local communication, based on the tcp handshakes
|
|
|
|
- :return: tuple consisting of avg delay for local and external communication, (local, external)
|
|
|
|
- """
|
|
|
|
-
|
|
|
|
- conv_delays = self.stats_db._process_user_defined_query("SELECT ipAddressA, ipAddressB, avgDelay FROM conv_statistics")
|
|
|
|
- if(conv_delays):
|
|
|
|
- external_conv = []
|
|
|
|
- local_conv = []
|
|
|
|
-
|
|
|
|
- for conv in conv_delays:
|
|
|
|
- IPA = IPAddress.parse(conv[0])
|
|
|
|
- IPB = IPAddress.parse(conv[1])
|
|
|
|
-
|
|
|
|
- #split into local and external conversations
|
|
|
|
- if(not IPA.is_private() or not IPB.is_private()):
|
|
|
|
- external_conv.append(conv)
|
|
|
|
- else:
|
|
|
|
- local_conv.append(conv)
|
|
|
|
-
|
|
|
|
- # calculate avg local and external delay by summing up the respective delays and dividing them by the number of conversations
|
|
|
|
- avg_delay_external = 0.0
|
|
|
|
- avg_delay_local = 0.0
|
|
|
|
-
|
|
|
|
- if(local_conv):
|
|
|
|
- for conv in local_conv:
|
|
|
|
- avg_delay_local += conv[2]
|
|
|
|
- avg_delay_local = (avg_delay_local/len(local_conv)) * 0.001 #ms
|
|
|
|
- else:
|
|
|
|
- # no local conversations in statistics found
|
|
|
|
- avg_delay_local = 0.06
|
|
|
|
-
|
|
|
|
- if(external_conv):
|
|
|
|
- for conv in external_conv:
|
|
|
|
- avg_delay_external += conv[2]
|
|
|
|
- avg_delay_external = (avg_delay_external/len(external_conv)) * 0.001 #ms
|
|
|
|
- else:
|
|
|
|
- # no external conversations in statistics found
|
|
|
|
- avg_delay_external = 0.15
|
|
|
|
- else:
|
|
|
|
- #if no statistics were found, use these numbers
|
|
|
|
- avg_delay_external = 0.15
|
|
|
|
- avg_delay_local = 0.06
|
|
|
|
- return avg_delay_local, avg_delay_external
|
|
|
|
-
|
|
|
|
- def filter_multiples(self, entries):
|
|
|
|
- """
|
|
|
|
- helper function, for get_out_degree and get_in_degree
|
|
|
|
- filters the given list for duplicate IpAddresses and, if duplciates are present, accumulates their values
|
|
|
|
-
|
|
|
|
- :param entries: list, each entry consists of an ipAddress and a numeric value
|
|
|
|
- :return: a filtered list, without duplicate ipAddresses
|
|
|
|
- """
|
|
|
|
-
|
|
|
|
- filtered_entries = []
|
|
|
|
- done = []
|
|
|
|
- for p1 in entries:
|
|
|
|
- added = False
|
|
|
|
- if p1 in done:
|
|
|
|
- continue
|
|
|
|
- for p2 in entries:
|
|
|
|
- if p1[0] == p2[0] and p1 != p2:
|
|
|
|
- filtered_entries.append((p1[0], p1[1] + p2[1]))
|
|
|
|
- done.append(p1)
|
|
|
|
- done.append(p2)
|
|
|
|
- #entries.remove(p2)
|
|
|
|
- added = True
|
|
|
|
- break
|
|
|
|
-
|
|
|
|
- if not added:
|
|
|
|
- filtered_entries.append(p1)
|
|
|
|
-
|
|
|
|
- return filtered_entries
|
|
|
|
-
|
|
|
|
|
|
|
|
def get_statistics_database(self):
|
|
def get_statistics_database(self):
|
|
"""
|
|
"""
|
|
@@ -1074,190 +938,6 @@ class Statistics:
|
|
plt.savefig(out, dpi=500)
|
|
plt.savefig(out, dpi=500)
|
|
return out
|
|
return out
|
|
|
|
|
|
- def plot_packets_per_connection(file_ending: str):
|
|
|
|
- """
|
|
|
|
- Plots the exchanged packets per connection as horizontal bar plot.
|
|
|
|
- Included are 'half-open' connections, where only one packet is exchanged.
|
|
|
|
- Note: there may be cutoff problems within the plot if there is to little data.
|
|
|
|
-
|
|
|
|
- :param file_ending: The file extension for the output file containing the plot
|
|
|
|
- :return: A filepath to the file containing the created plot
|
|
|
|
- """
|
|
|
|
- plt.gcf().clear()
|
|
|
|
- result = self.stats_db._process_user_defined_query(
|
|
|
|
- "SELECT ipAddressA, portA, ipAddressB, portB, pktsCount FROM conv_statistics_stateless")
|
|
|
|
-
|
|
|
|
- if (result):
|
|
|
|
- graphy, graphx = [], []
|
|
|
|
- # plot data in descending order
|
|
|
|
- result = sorted(result, key=lambda row: row[4])
|
|
|
|
-
|
|
|
|
- # compute plot data
|
|
|
|
- for i, row in enumerate(result):
|
|
|
|
- addr1, addr2 = "%s:%d" % (row[0], row[1]), "%s:%d" % (row[2], row[3])
|
|
|
|
- # adjust the justification of strings to improve appearance
|
|
|
|
- len_max = max(len(addr1), len(addr2))
|
|
|
|
- addr1 = addr1.ljust(len_max)
|
|
|
|
- addr2 = addr2.ljust(len_max)
|
|
|
|
- # add plot data
|
|
|
|
- graphy.append("%s\n%s" % (addr1, addr2))
|
|
|
|
- graphx.append(row[4])
|
|
|
|
-
|
|
|
|
- # compute plot height in inches
|
|
|
|
- dist_mult_height, dist_mult_width = 0.55, 0.07 # these values turned out to work well
|
|
|
|
- plt_height, plt_width = len(graphy) * dist_mult_height, max(graphx) * dist_mult_width
|
|
|
|
- title_distance = 1 + 0.012*52.8/plt_height # orginally, a good title distance turned out to be 1.012 with a plot height of 52.8
|
|
|
|
-
|
|
|
|
- # have x axis and its label appear at the top (instead of bottom)
|
|
|
|
- fig, ax = plt.subplots()
|
|
|
|
- ax.xaxis.tick_top()
|
|
|
|
- ax.xaxis.set_label_position("top")
|
|
|
|
-
|
|
|
|
- # set additional plot parameters
|
|
|
|
- plt.title("Sent packets per connection", y=title_distance)
|
|
|
|
- plt.xlabel('Number of Packets')
|
|
|
|
- plt.ylabel('Connection')
|
|
|
|
- width = 0.5
|
|
|
|
- plt.grid(True)
|
|
|
|
- plt.gca().margins(y=0) # removes the space between data and x-axis within the plot
|
|
|
|
- plt.gcf().set_size_inches(plt_width, plt_height) # set plot size
|
|
|
|
-
|
|
|
|
- # plot the above data, first use plain numbers as graphy to maintain sorting
|
|
|
|
- plt.barh(range(len(graphy)), graphx, width, align='center', linewidth=1, color='red', edgecolor='red')
|
|
|
|
- # now change the y numbers to the respective address labels
|
|
|
|
- plt.yticks(range(len(graphy)), graphy)
|
|
|
|
- # try to use tight layout to cut off unnecessary space
|
|
|
|
- try:
|
|
|
|
- plt.tight_layout(pad=4)
|
|
|
|
- except ValueError:
|
|
|
|
- pass
|
|
|
|
-
|
|
|
|
- # save created figure
|
|
|
|
- out = self.pcap_filepath.replace('.pcap', '_plot-PktCount per Connection Distribution' + file_ending)
|
|
|
|
- plt.savefig(out, dpi=500)
|
|
|
|
- return out
|
|
|
|
- else:
|
|
|
|
- print("Error plot protocol: No protocol values found!")
|
|
|
|
-
|
|
|
|
- def plot_out_degree(file_ending: str):
|
|
|
|
- plt.gcf().clear()
|
|
|
|
- out_degree = self.get_out_degree()
|
|
|
|
- #print("")
|
|
|
|
- #print("#############in plot_out_degree###########")
|
|
|
|
- #print(out_degree)
|
|
|
|
-
|
|
|
|
- graphx, graphy = [], []
|
|
|
|
- for entry in out_degree:
|
|
|
|
- graphx.append(entry[0])
|
|
|
|
- graphy.append(entry[1])
|
|
|
|
- plt.autoscale(enable=True, axis='both')
|
|
|
|
- plt.title("Outdegree")
|
|
|
|
- plt.xlabel('IpAddress')
|
|
|
|
- plt.ylabel('Outdegree')
|
|
|
|
- width = 0.1
|
|
|
|
- plt.xlim([0, len(graphx)])
|
|
|
|
- plt.grid(True)
|
|
|
|
-
|
|
|
|
- x = range(0,len(graphx))
|
|
|
|
- my_xticks = graphx
|
|
|
|
- plt.xticks(x, my_xticks)
|
|
|
|
-
|
|
|
|
- plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
|
|
|
|
- out = self.pcap_filepath.replace('.pcap', '_out_degree' + file_ending)
|
|
|
|
- plt.savefig(out,dpi=500)
|
|
|
|
- return out
|
|
|
|
-
|
|
|
|
- def plot_in_degree(file_ending: str):
|
|
|
|
- plt.gcf().clear()
|
|
|
|
- in_degree = self.get_in_degree()
|
|
|
|
-
|
|
|
|
- graphx, graphy = [], []
|
|
|
|
- for entry in in_degree:
|
|
|
|
- graphx.append(entry[0])
|
|
|
|
- graphy.append(entry[1])
|
|
|
|
- plt.autoscale(enable=True, axis='both')
|
|
|
|
- plt.title("Indegree")
|
|
|
|
- plt.xlabel('IpAddress')
|
|
|
|
- plt.ylabel('Indegree')
|
|
|
|
- width = 0.1
|
|
|
|
- plt.xlim([0, len(graphx)])
|
|
|
|
- plt.grid(True)
|
|
|
|
-
|
|
|
|
- x = range(0,len(graphx))
|
|
|
|
- my_xticks = graphx
|
|
|
|
- plt.xticks(x, my_xticks)
|
|
|
|
-
|
|
|
|
- plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
|
|
|
|
- out = self.pcap_filepath.replace('.pcap', '_in_degree' + file_ending)
|
|
|
|
- plt.savefig(out,dpi=500)
|
|
|
|
- return out
|
|
|
|
-
|
|
|
|
- def plot_avgpkts_per_comm_interval(file_ending: str):
|
|
|
|
- """
|
|
|
|
- Plots the exchanged packets per connection as horizontal bar plot.
|
|
|
|
- Included are 'half-open' connections, where only one packet is exchanged.
|
|
|
|
- Note: there may be cutoff problems within the plot if there is to little data.
|
|
|
|
-
|
|
|
|
- :param file_ending: The file extension for the output file containing the plot
|
|
|
|
- :return: A filepath to the file containing the created plot
|
|
|
|
- """
|
|
|
|
- plt.gcf().clear()
|
|
|
|
- result = self.stats_db._process_user_defined_query(
|
|
|
|
- "SELECT ipAddressA, portA, ipAddressB, portB, avgPktCount FROM comm_interval_statistics")
|
|
|
|
-
|
|
|
|
- if (result):
|
|
|
|
- graphy, graphx = [], []
|
|
|
|
- # plot data in descending order
|
|
|
|
- result = sorted(result, key=lambda row: row[4])
|
|
|
|
-
|
|
|
|
- # compute plot data
|
|
|
|
- for i, row in enumerate(result):
|
|
|
|
- addr1, addr2 = "%s:%d" % (row[0], row[1]), "%s:%d" % (row[2], row[3])
|
|
|
|
- # adjust the justification of strings to improve appearance
|
|
|
|
- len_max = max(len(addr1), len(addr2))
|
|
|
|
- addr1 = addr1.ljust(len_max)
|
|
|
|
- addr2 = addr2.ljust(len_max)
|
|
|
|
- # add plot data
|
|
|
|
- graphy.append("%s\n%s" % (addr1, addr2))
|
|
|
|
- graphx.append(row[4])
|
|
|
|
-
|
|
|
|
- # compute plot height in inches
|
|
|
|
- dist_mult_height, dist_mult_width = 0.55, 0.07 # these values turned out to work well
|
|
|
|
- plt_height, plt_width = len(graphy) * dist_mult_height, max(graphx) * dist_mult_width
|
|
|
|
- title_distance = 1 + 0.012*52.8/plt_height # orginally, a good title distance turned out to be 1.012 with a plot height of 52.8
|
|
|
|
-
|
|
|
|
- # have x axis and its label appear at the top (instead of bottom)
|
|
|
|
- fig, ax = plt.subplots()
|
|
|
|
- ax.xaxis.tick_top()
|
|
|
|
- ax.xaxis.set_label_position("top")
|
|
|
|
-
|
|
|
|
- # set additional plot parameters
|
|
|
|
- plt.title("Average number of packets per communication interval", y=title_distance)
|
|
|
|
- plt.xlabel('Number of Packets')
|
|
|
|
- plt.ylabel('Connection')
|
|
|
|
- width = 0.5
|
|
|
|
- plt.grid(True)
|
|
|
|
- plt.gca().margins(y=0) # removes the space between data and x-axis within the plot
|
|
|
|
- plt.gcf().set_size_inches(plt_width, plt_height) # set plot size
|
|
|
|
-
|
|
|
|
- # plot the above data, first use plain numbers as graphy to maintain sorting
|
|
|
|
- plt.barh(range(len(graphy)), graphx, width, align='center', linewidth=1, color='red', edgecolor='red')
|
|
|
|
- # now change the y numbers to the respective address labels
|
|
|
|
- plt.yticks(range(len(graphy)), graphy)
|
|
|
|
- # try to use tight layout to cut off unnecessary space
|
|
|
|
- try:
|
|
|
|
- plt.tight_layout(pad=4)
|
|
|
|
- except ValueError:
|
|
|
|
- pass
|
|
|
|
-
|
|
|
|
- # save created figure
|
|
|
|
- out = self.pcap_filepath.replace('.pcap', '_plot-Avg PktCount Communication Interval Distribution' + file_ending)
|
|
|
|
- plt.savefig(out, dpi=500)
|
|
|
|
- return out
|
|
|
|
- else:
|
|
|
|
- print("Error plot protocol: No protocol values found!")
|
|
|
|
-
|
|
|
|
-
|
|
|
|
ttl_out_path = plot_ttl('.' + format)
|
|
ttl_out_path = plot_ttl('.' + format)
|
|
mss_out_path = plot_mss('.' + format)
|
|
mss_out_path = plot_mss('.' + format)
|
|
win_out_path = plot_win('.' + format)
|
|
win_out_path = plot_win('.' + format)
|
|
@@ -1273,10 +953,6 @@ class Statistics:
|
|
plot_interval_new_tos = plot_interval_new_tos('.' + format)
|
|
plot_interval_new_tos = plot_interval_new_tos('.' + format)
|
|
plot_interval_new_win_size = plot_interval_new_win_size('.' + format)
|
|
plot_interval_new_win_size = plot_interval_new_win_size('.' + format)
|
|
plot_interval_new_mss = plot_interval_new_mss('.' + format)
|
|
plot_interval_new_mss = plot_interval_new_mss('.' + format)
|
|
- plot_packets_per_connection_out = plot_packets_per_connection('.' + format)
|
|
|
|
- plot_out_degree = plot_out_degree('.' + format)
|
|
|
|
- plot_in_degree = plot_in_degree('.' + format)
|
|
|
|
- plot_avgpkts_per_comm_interval_out = plot_avgpkts_per_comm_interval('.' + format)
|
|
|
|
|
|
|
|
## Time consuming plot
|
|
## Time consuming plot
|
|
# port_out_path = plot_port('.' + format)
|
|
# port_out_path = plot_port('.' + format)
|
|
@@ -1285,4 +961,3 @@ class Statistics:
|
|
# ip_dst_out_path = plot_ip_dst('.' + format)
|
|
# ip_dst_out_path = plot_ip_dst('.' + format)
|
|
|
|
|
|
print("Saved plots in the input PCAP directory.")
|
|
print("Saved plots in the input PCAP directory.")
|
|
- print("In-/Out-/Overall-degree plots not fully finished yet")
|
|
|