|
@@ -10,6 +10,7 @@ matplotlib.use('Agg')
|
|
|
import matplotlib.pyplot as plt
|
|
|
from ID2TLib.PcapFile import PcapFile
|
|
|
from ID2TLib.StatsDatabase import StatsDatabase
|
|
|
+from ID2TLib.IPv4 import IPAddress
|
|
|
|
|
|
|
|
|
class Statistics:
|
|
@@ -499,12 +500,6 @@ class Statistics:
|
|
|
result_dict = {key: value for (key, value) in result}
|
|
|
return result_dict
|
|
|
|
|
|
- def get_ip_address_count(self):
|
|
|
- return self.process_db_query("SELECT COUNT(*) FROM ip_statistics")
|
|
|
-
|
|
|
- def get_ip_addresses(self):
|
|
|
- return self.process_db_query("SELECT ipAddress FROM ip_statistics")
|
|
|
-
|
|
|
def get_random_ip_address(self, count: int = 1):
|
|
|
"""
|
|
|
:param count: The number of IP addreses to return
|
|
@@ -519,13 +514,6 @@ class Statistics:
|
|
|
ip_address_list.append(self.process_db_query("random(all(ipAddress))"))
|
|
|
return ip_address_list
|
|
|
|
|
|
- def get_ip_address_from_mac(self, macAddress: str):
|
|
|
- """
|
|
|
- :param macAddress: the MAC address of which the IP shall be returned, if existing in DB
|
|
|
- :return: the IP address used in the dataset by a given MAC address
|
|
|
- """
|
|
|
- return self.process_db_query('ipAddress(macAddress=' + macAddress + ")")
|
|
|
-
|
|
|
def get_mac_address(self, ipAddress: str):
|
|
|
"""
|
|
|
:return: The MAC address used in the dataset for the given IP address.
|
|
@@ -557,6 +545,154 @@ class Statistics:
|
|
|
else:
|
|
|
return None
|
|
|
|
|
|
+ def get_in_degree(self):
|
|
|
+ """
|
|
|
+ determines the in-degree for each local ipAddress, i.e. for every IP the count of ipAddresses it has received packets from
|
|
|
+ :return: a list, each entry consists of one local IPAddress and its associated in-degree
|
|
|
+ """
|
|
|
+
|
|
|
+ in_degree_raw = self.stats_db._process_user_defined_query(
|
|
|
+ "SELECT ipAddressA, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'in\' AND portNumber = portA GROUP BY ipAddress " +
|
|
|
+ "UNION " +
|
|
|
+ "SELECT ipAddressB, Count(DISTINCT ipAddressA) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressB WHERE portDirection=\'in\' AND portNumber = portB GROUP BY ipAddress")
|
|
|
+
|
|
|
+ #Because of the structure of the database, there could be 2 entries for the same IP Address, therefore accumulate their sums
|
|
|
+ in_degree = self.filter_multiples(in_degree_raw)
|
|
|
+
|
|
|
+ return in_degree
|
|
|
+
|
|
|
+ def get_out_degree(self):
|
|
|
+ """
|
|
|
+ determines the out-degree for each local ipAddress, i.e. for every IP the count of ipAddresses it has sent packets to
|
|
|
+ :return: a list, each entry consists of one local IPAddress and its associated out-degree
|
|
|
+ """
|
|
|
+ """
|
|
|
+
|
|
|
+ test = self.stats_db._process_user_defined_query("SELECT DISTINCT * FROM conv_statistics")
|
|
|
+ #test2 = self.stats_db._process_user_defined_query("SELECT DISTINCT ipAddressB, portB FROM conv_statistics")
|
|
|
+ print("############# conv_statistics IP's + Ports")
|
|
|
+ for p in test:
|
|
|
+ print(p)
|
|
|
+ #for p in test2:
|
|
|
+ # print(p)
|
|
|
+
|
|
|
+ print("############## ip_ports ##################")
|
|
|
+ test3 = self.stats_db._process_user_defined_query("SELECT DISTINCT ipAddress, portNumber, portDirection FROM ip_ports")
|
|
|
+ for p in test3:
|
|
|
+ print(p)
|
|
|
+
|
|
|
+ print("")
|
|
|
+ print("############## AFTER JOIN - A #############")
|
|
|
+ test4 = self.stats_db._process_user_defined_query(
|
|
|
+ "SELECT * FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'out\' AND portNumber = portA") # Hier werden die anfang locals rausgefiltert!
|
|
|
+ for p in test4:
|
|
|
+ print(p)
|
|
|
+
|
|
|
+ print("")
|
|
|
+ print("############## AFTER JOIN - B #############")
|
|
|
+ test6 = self.stats_db._process_user_defined_query(
|
|
|
+ "SELECT * FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressB WHERE portDirection=\'out\' AND portNumber = portB") # Hier werden die anfang locals rausgefiltert!
|
|
|
+ for p in test6:
|
|
|
+ print(p)
|
|
|
+
|
|
|
+ print("")
|
|
|
+ print("############## BUILD UP PART FOR PART#############")
|
|
|
+ test5 = self.stats_db._process_user_defined_query(
|
|
|
+ "SELECT ipAddress, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'out\' GROUP BY ipAddress")
|
|
|
+ for p in test5:
|
|
|
+ print(p)
|
|
|
+ """
|
|
|
+ out_degree_raw = self.stats_db._process_user_defined_query(
|
|
|
+ "SELECT ipAddressA, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'out\' AND portNumber = portA GROUP BY ipAddress " +
|
|
|
+ "UNION " +
|
|
|
+ "SELECT ipAddressB, Count(DISTINCT ipAddressA) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressB WHERE portDirection=\'out\' AND portNumber = portB GROUP BY ipAddress")
|
|
|
+
|
|
|
+ #filter out non-local IPs
|
|
|
+ #out_degree_raw_2 = []
|
|
|
+ #for entry in out_degree_raw:
|
|
|
+ # if IPAddress.parse(entry[0]).is_reserved():
|
|
|
+ # out_degree_raw_2.append(entry)
|
|
|
+
|
|
|
+ #Because of the structure of the database, there could be 2 entries for the same IP Address, therefore accumulate their sums
|
|
|
+ out_degree = self.filter_multiples(out_degree_raw)
|
|
|
+
|
|
|
+ return out_degree
|
|
|
+
|
|
|
+ def get_avg_delay_local_ext(self):
|
|
|
+ """
|
|
|
+ Calculates the average delay of a packet for external and local communication, based on the tcp handshakes
|
|
|
+ :return: tuple consisting of avg delay for local and external communication, (local, external)
|
|
|
+ """
|
|
|
+
|
|
|
+ conv_delays = self.stats_db._process_user_defined_query("SELECT ipAddressA, ipAddressB, avgDelay FROM conv_statistics")
|
|
|
+ if(conv_delays):
|
|
|
+ external_conv = []
|
|
|
+ local_conv = []
|
|
|
+
|
|
|
+ for conv in conv_delays:
|
|
|
+ IPA = IPAddress.parse(conv[0])
|
|
|
+ IPB = IPAddress.parse(conv[1])
|
|
|
+
|
|
|
+ #split into local and external conversations
|
|
|
+ if(not IPA.is_private() or not IPB.is_private()):
|
|
|
+ external_conv.append(conv)
|
|
|
+ else:
|
|
|
+ local_conv.append(conv)
|
|
|
+
|
|
|
+ # calculate avg local and external delay by summing up the respective delays and dividing them by the number of conversations
|
|
|
+ avg_delay_external = 0.0
|
|
|
+ avg_delay_local = 0.0
|
|
|
+
|
|
|
+ if(local_conv):
|
|
|
+ for conv in local_conv:
|
|
|
+ avg_delay_local += conv[2]
|
|
|
+ avg_delay_local = (avg_delay_local/len(local_conv)) * 0.001 #ms
|
|
|
+ else:
|
|
|
+ # no local conversations in statistics found
|
|
|
+ avg_delay_local = 0.06
|
|
|
+
|
|
|
+ if(external_conv):
|
|
|
+ for conv in external_conv:
|
|
|
+ avg_delay_external += conv[2]
|
|
|
+ avg_delay_external = (avg_delay_external/len(external_conv)) * 0.001 #ms
|
|
|
+ else:
|
|
|
+ # no external conversations in statistics found
|
|
|
+ avg_delay_external = 0.15
|
|
|
+ else:
|
|
|
+ #if no statistics were found, use these numbers
|
|
|
+ avg_delay_external = 0.15
|
|
|
+ avg_delay_local = 0.06
|
|
|
+ return avg_delay_local, avg_delay_external
|
|
|
+
|
|
|
+ def filter_multiples(self, entries):
|
|
|
+ """
|
|
|
+ helper function, for get_out_degree and get_in_degree
|
|
|
+ filters the given list for duplicate IpAddresses and, if duplciates are present, accumulates their values
|
|
|
+
|
|
|
+ :param entries: list, each entry consists of an ipAddress and a numeric value
|
|
|
+ :return: a filtered list, without duplicate ipAddresses
|
|
|
+ """
|
|
|
+
|
|
|
+ filtered_entries = []
|
|
|
+ done = []
|
|
|
+ for p1 in entries:
|
|
|
+ added = False
|
|
|
+ if p1 in done:
|
|
|
+ continue
|
|
|
+ for p2 in entries:
|
|
|
+ if p1[0] == p2[0] and p1 != p2:
|
|
|
+ filtered_entries.append((p1[0], p1[1] + p2[1]))
|
|
|
+ done.append(p1)
|
|
|
+ done.append(p2)
|
|
|
+ #entries.remove(p2)
|
|
|
+ added = True
|
|
|
+ break
|
|
|
+
|
|
|
+ if not added:
|
|
|
+ filtered_entries.append(p1)
|
|
|
+
|
|
|
+ return filtered_entries
|
|
|
+
|
|
|
|
|
|
def get_statistics_database(self):
|
|
|
"""
|
|
@@ -938,6 +1074,190 @@ class Statistics:
|
|
|
plt.savefig(out, dpi=500)
|
|
|
return out
|
|
|
|
|
|
+ def plot_packets_per_connection(file_ending: str):
|
|
|
+ """
|
|
|
+ Plots the exchanged packets per connection as horizontal bar plot.
|
|
|
+ Included are 'half-open' connections, where only one packet is exchanged.
|
|
|
+ Note: there may be cutoff problems within the plot if there is to little data.
|
|
|
+
|
|
|
+ :param file_ending: The file extension for the output file containing the plot
|
|
|
+ :return: A filepath to the file containing the created plot
|
|
|
+ """
|
|
|
+ plt.gcf().clear()
|
|
|
+ result = self.stats_db._process_user_defined_query(
|
|
|
+ "SELECT ipAddressA, portA, ipAddressB, portB, pktsCount FROM conv_statistics_stateless")
|
|
|
+
|
|
|
+ if (result):
|
|
|
+ graphy, graphx = [], []
|
|
|
+ # plot data in descending order
|
|
|
+ result = sorted(result, key=lambda row: row[4])
|
|
|
+
|
|
|
+ # compute plot data
|
|
|
+ for i, row in enumerate(result):
|
|
|
+ addr1, addr2 = "%s:%d" % (row[0], row[1]), "%s:%d" % (row[2], row[3])
|
|
|
+ # adjust the justification of strings to improve appearance
|
|
|
+ len_max = max(len(addr1), len(addr2))
|
|
|
+ addr1 = addr1.ljust(len_max)
|
|
|
+ addr2 = addr2.ljust(len_max)
|
|
|
+ # add plot data
|
|
|
+ graphy.append("%s\n%s" % (addr1, addr2))
|
|
|
+ graphx.append(row[4])
|
|
|
+
|
|
|
+ # compute plot height in inches
|
|
|
+ dist_mult_height, dist_mult_width = 0.55, 0.07 # these values turned out to work well
|
|
|
+ plt_height, plt_width = len(graphy) * dist_mult_height, max(graphx) * dist_mult_width
|
|
|
+ title_distance = 1 + 0.012*52.8/plt_height # orginally, a good title distance turned out to be 1.012 with a plot height of 52.8
|
|
|
+
|
|
|
+ # have x axis and its label appear at the top (instead of bottom)
|
|
|
+ fig, ax = plt.subplots()
|
|
|
+ ax.xaxis.tick_top()
|
|
|
+ ax.xaxis.set_label_position("top")
|
|
|
+
|
|
|
+ # set additional plot parameters
|
|
|
+ plt.title("Sent packets per connection", y=title_distance)
|
|
|
+ plt.xlabel('Number of Packets')
|
|
|
+ plt.ylabel('Connection')
|
|
|
+ width = 0.5
|
|
|
+ plt.grid(True)
|
|
|
+ plt.gca().margins(y=0) # removes the space between data and x-axis within the plot
|
|
|
+ plt.gcf().set_size_inches(plt_width, plt_height) # set plot size
|
|
|
+
|
|
|
+ # plot the above data, first use plain numbers as graphy to maintain sorting
|
|
|
+ plt.barh(range(len(graphy)), graphx, width, align='center', linewidth=1, color='red', edgecolor='red')
|
|
|
+ # now change the y numbers to the respective address labels
|
|
|
+ plt.yticks(range(len(graphy)), graphy)
|
|
|
+ # try to use tight layout to cut off unnecessary space
|
|
|
+ try:
|
|
|
+ plt.tight_layout(pad=4)
|
|
|
+ except ValueError:
|
|
|
+ pass
|
|
|
+
|
|
|
+ # save created figure
|
|
|
+ out = self.pcap_filepath.replace('.pcap', '_plot-PktCount per Connection Distribution' + file_ending)
|
|
|
+ plt.savefig(out, dpi=500)
|
|
|
+ return out
|
|
|
+ else:
|
|
|
+ print("Error plot protocol: No protocol values found!")
|
|
|
+
|
|
|
+ def plot_out_degree(file_ending: str):
|
|
|
+ plt.gcf().clear()
|
|
|
+ out_degree = self.get_out_degree()
|
|
|
+ #print("")
|
|
|
+ #print("#############in plot_out_degree###########")
|
|
|
+ #print(out_degree)
|
|
|
+
|
|
|
+ graphx, graphy = [], []
|
|
|
+ for entry in out_degree:
|
|
|
+ graphx.append(entry[0])
|
|
|
+ graphy.append(entry[1])
|
|
|
+ plt.autoscale(enable=True, axis='both')
|
|
|
+ plt.title("Outdegree")
|
|
|
+ plt.xlabel('IpAddress')
|
|
|
+ plt.ylabel('Outdegree')
|
|
|
+ width = 0.1
|
|
|
+ plt.xlim([0, len(graphx)])
|
|
|
+ plt.grid(True)
|
|
|
+
|
|
|
+ x = range(0,len(graphx))
|
|
|
+ my_xticks = graphx
|
|
|
+ plt.xticks(x, my_xticks)
|
|
|
+
|
|
|
+ plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
|
|
|
+ out = self.pcap_filepath.replace('.pcap', '_out_degree' + file_ending)
|
|
|
+ plt.savefig(out,dpi=500)
|
|
|
+ return out
|
|
|
+
|
|
|
+ def plot_in_degree(file_ending: str):
|
|
|
+ plt.gcf().clear()
|
|
|
+ in_degree = self.get_in_degree()
|
|
|
+
|
|
|
+ graphx, graphy = [], []
|
|
|
+ for entry in in_degree:
|
|
|
+ graphx.append(entry[0])
|
|
|
+ graphy.append(entry[1])
|
|
|
+ plt.autoscale(enable=True, axis='both')
|
|
|
+ plt.title("Indegree")
|
|
|
+ plt.xlabel('IpAddress')
|
|
|
+ plt.ylabel('Indegree')
|
|
|
+ width = 0.1
|
|
|
+ plt.xlim([0, len(graphx)])
|
|
|
+ plt.grid(True)
|
|
|
+
|
|
|
+ x = range(0,len(graphx))
|
|
|
+ my_xticks = graphx
|
|
|
+ plt.xticks(x, my_xticks)
|
|
|
+
|
|
|
+ plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
|
|
|
+ out = self.pcap_filepath.replace('.pcap', '_in_degree' + file_ending)
|
|
|
+ plt.savefig(out,dpi=500)
|
|
|
+ return out
|
|
|
+
|
|
|
+ def plot_avgpkts_per_comm_interval(file_ending: str):
|
|
|
+ """
|
|
|
+ Plots the exchanged packets per connection as horizontal bar plot.
|
|
|
+ Included are 'half-open' connections, where only one packet is exchanged.
|
|
|
+ Note: there may be cutoff problems within the plot if there is to little data.
|
|
|
+
|
|
|
+ :param file_ending: The file extension for the output file containing the plot
|
|
|
+ :return: A filepath to the file containing the created plot
|
|
|
+ """
|
|
|
+ plt.gcf().clear()
|
|
|
+ result = self.stats_db._process_user_defined_query(
|
|
|
+ "SELECT ipAddressA, portA, ipAddressB, portB, avgPktCount FROM comm_interval_statistics")
|
|
|
+
|
|
|
+ if (result):
|
|
|
+ graphy, graphx = [], []
|
|
|
+ # plot data in descending order
|
|
|
+ result = sorted(result, key=lambda row: row[4])
|
|
|
+
|
|
|
+ # compute plot data
|
|
|
+ for i, row in enumerate(result):
|
|
|
+ addr1, addr2 = "%s:%d" % (row[0], row[1]), "%s:%d" % (row[2], row[3])
|
|
|
+ # adjust the justification of strings to improve appearance
|
|
|
+ len_max = max(len(addr1), len(addr2))
|
|
|
+ addr1 = addr1.ljust(len_max)
|
|
|
+ addr2 = addr2.ljust(len_max)
|
|
|
+ # add plot data
|
|
|
+ graphy.append("%s\n%s" % (addr1, addr2))
|
|
|
+ graphx.append(row[4])
|
|
|
+
|
|
|
+ # compute plot height in inches
|
|
|
+ dist_mult_height, dist_mult_width = 0.55, 0.07 # these values turned out to work well
|
|
|
+ plt_height, plt_width = len(graphy) * dist_mult_height, max(graphx) * dist_mult_width
|
|
|
+ title_distance = 1 + 0.012*52.8/plt_height # orginally, a good title distance turned out to be 1.012 with a plot height of 52.8
|
|
|
+
|
|
|
+ # have x axis and its label appear at the top (instead of bottom)
|
|
|
+ fig, ax = plt.subplots()
|
|
|
+ ax.xaxis.tick_top()
|
|
|
+ ax.xaxis.set_label_position("top")
|
|
|
+
|
|
|
+ # set additional plot parameters
|
|
|
+ plt.title("Average number of packets per communication interval", y=title_distance)
|
|
|
+ plt.xlabel('Number of Packets')
|
|
|
+ plt.ylabel('Connection')
|
|
|
+ width = 0.5
|
|
|
+ plt.grid(True)
|
|
|
+ plt.gca().margins(y=0) # removes the space between data and x-axis within the plot
|
|
|
+ plt.gcf().set_size_inches(plt_width, plt_height) # set plot size
|
|
|
+
|
|
|
+ # plot the above data, first use plain numbers as graphy to maintain sorting
|
|
|
+ plt.barh(range(len(graphy)), graphx, width, align='center', linewidth=1, color='red', edgecolor='red')
|
|
|
+ # now change the y numbers to the respective address labels
|
|
|
+ plt.yticks(range(len(graphy)), graphy)
|
|
|
+ # try to use tight layout to cut off unnecessary space
|
|
|
+ try:
|
|
|
+ plt.tight_layout(pad=4)
|
|
|
+ except ValueError:
|
|
|
+ pass
|
|
|
+
|
|
|
+ # save created figure
|
|
|
+ out = self.pcap_filepath.replace('.pcap', '_plot-Avg PktCount Communication Interval Distribution' + file_ending)
|
|
|
+ plt.savefig(out, dpi=500)
|
|
|
+ return out
|
|
|
+ else:
|
|
|
+ print("Error plot protocol: No protocol values found!")
|
|
|
+
|
|
|
+
|
|
|
ttl_out_path = plot_ttl('.' + format)
|
|
|
mss_out_path = plot_mss('.' + format)
|
|
|
win_out_path = plot_win('.' + format)
|
|
@@ -953,6 +1273,10 @@ class Statistics:
|
|
|
plot_interval_new_tos = plot_interval_new_tos('.' + format)
|
|
|
plot_interval_new_win_size = plot_interval_new_win_size('.' + format)
|
|
|
plot_interval_new_mss = plot_interval_new_mss('.' + format)
|
|
|
+ plot_packets_per_connection_out = plot_packets_per_connection('.' + format)
|
|
|
+ plot_out_degree = plot_out_degree('.' + format)
|
|
|
+ plot_in_degree = plot_in_degree('.' + format)
|
|
|
+ plot_avgpkts_per_comm_interval_out = plot_avgpkts_per_comm_interval('.' + format)
|
|
|
|
|
|
## Time consuming plot
|
|
|
# port_out_path = plot_port('.' + format)
|
|
@@ -961,3 +1285,4 @@ class Statistics:
|
|
|
# ip_dst_out_path = plot_ip_dst('.' + format)
|
|
|
|
|
|
print("Saved plots in the input PCAP directory.")
|
|
|
+ print("In-/Out-/Overall-degree plots not fully finished yet")
|