Parcourir la source

Added In-/Out-Degree plots (not complete yet)

Added the plotting for In-/Out-Degree Statistics. There are still things to change, but the core functionality is done.
Marcel Juschak il y a 6 ans
Parent
commit
d8ccdb5c9c
1 fichiers modifiés avec 159 ajouts et 0 suppressions
  1. 159 0
      code/ID2TLib/Statistics.py

+ 159 - 0
code/ID2TLib/Statistics.py

@@ -10,6 +10,7 @@ matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 from ID2TLib.PcapFile import PcapFile
 from ID2TLib.StatsDatabase import StatsDatabase
+from ID2TLib.IPv4 import IPAddress
 
 
 class Statistics:
@@ -544,6 +545,108 @@ class Statistics:
         else:
             return None
 
+    def get_in_degree(self):
+        """
+        determines the in-degree for each local ipAddress, i.e. for every IP the count of ipAddresses it has received packets from
+        :return: a list, each entry consists of one local IPAddress and its associated in-degree
+        """
+
+        in_degree_raw = self.stats_db._process_user_defined_query(
+                "SELECT ipAddressA, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'in\' AND portNumber = portA GROUP BY ipAddress " +
+                "UNION " +
+                "SELECT ipAddressB, Count(DISTINCT ipAddressA) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressB WHERE portDirection=\'in\' AND portNumber = portB GROUP BY ipAddress")
+        
+        #Because of the structure of the database, there could be 2 entries for the same IP Address, therefore accumulate their sums
+        in_degree = self.filter_multiples(in_degree_raw)
+
+        return in_degree
+
+    def get_out_degree(self):
+        """
+        determines the out-degree for each local ipAddress, i.e. for every IP the count of ipAddresses it has sent packets to
+        :return: a list, each entry consists of one local IPAddress and its associated out-degree
+        """
+        """
+
+        test = self.stats_db._process_user_defined_query("SELECT DISTINCT * FROM conv_statistics")
+        #test2 = self.stats_db._process_user_defined_query("SELECT DISTINCT ipAddressB, portB FROM conv_statistics")
+        print("############# conv_statistics IP's + Ports")
+        for p in test:
+            print(p)
+        #for p in test2:
+        #    print(p)
+
+        print("############## ip_ports ##################")
+        test3 = self.stats_db._process_user_defined_query("SELECT DISTINCT ipAddress, portNumber, portDirection FROM ip_ports")
+        for p in test3:
+            print(p)
+
+        print("")
+        print("############## AFTER JOIN - A #############")
+        test4 = self.stats_db._process_user_defined_query(
+                "SELECT * FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'out\' AND portNumber = portA") # Hier werden die anfang locals rausgefiltert!
+        for p in test4:
+            print(p)
+
+        print("")
+        print("############## AFTER JOIN - B #############")
+        test6 = self.stats_db._process_user_defined_query(
+                "SELECT * FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressB WHERE portDirection=\'out\' AND portNumber = portB") # Hier werden die anfang locals rausgefiltert!
+        for p in test6:
+            print(p)
+
+        print("")
+        print("############## BUILD UP PART FOR PART#############")
+        test5 = self.stats_db._process_user_defined_query(
+                "SELECT ipAddress, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'out\' GROUP BY ipAddress")
+        for p in test5:
+            print(p)
+        """
+        out_degree_raw = self.stats_db._process_user_defined_query(
+                "SELECT ipAddressA, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'out\' AND portNumber = portA GROUP BY ipAddress " +
+                "UNION " +
+                "SELECT ipAddressB, Count(DISTINCT ipAddressA) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressB WHERE portDirection=\'out\' AND portNumber = portB GROUP BY ipAddress")
+
+        #filter out non-local IPs
+        #out_degree_raw_2 = []
+        #for entry in out_degree_raw:
+        #    if IPAddress.parse(entry[0]).is_reserved():
+        #        out_degree_raw_2.append(entry)
+
+        #Because of the structure of the database, there could be 2 entries for the same IP Address, therefore accumulate their sums
+        out_degree = self.filter_multiples(out_degree_raw)
+
+        return out_degree
+
+    def filter_multiples(self, entries):
+        """
+        helper function, for get_out_degree and get_in_degree
+        filters the given list for duplicate IpAddresses and, if duplciates are present, accumulates their values
+
+        :param entries: list, each entry consists of an ipAddress and a numeric value
+        :return: a filtered list, without duplicate ipAddresses
+        """
+
+        filtered_entries = []
+        done = []
+        for p1 in entries:       
+            added = False
+            if p1 in done:
+                continue
+            for p2 in entries:
+                if p1[0] == p2[0] and p1 != p2:
+                    filtered_entries.append((p1[0], p1[1] + p2[1]))
+                    done.append(p1)
+                    done.append(p2)
+                    #entries.remove(p2)
+                    added = True
+                    break
+
+            if not added:
+                filtered_entries.append(p1)
+
+        return filtered_entries
+
 
     def get_statistics_database(self):
         """
@@ -978,6 +1081,59 @@ class Statistics:
             else:
                 print("Error plot protocol: No protocol values found!")
 
+        def plot_out_degree(file_ending: str):
+            plt.gcf().clear()
+            out_degree = self.get_out_degree()
+            #print("")
+            #print("#############in plot_out_degree###########")
+            #print(out_degree)
+
+            graphx, graphy = [], []
+            for entry in out_degree:
+                graphx.append(entry[0])
+                graphy.append(entry[1])
+            plt.autoscale(enable=True, axis='both')
+            plt.title("Outdegree")
+            plt.xlabel('IpAddress')
+            plt.ylabel('Outdegree')
+            width = 0.1
+            plt.xlim([0, len(graphx)])
+            plt.grid(True)
+
+            x = range(0,len(graphx))
+            my_xticks = graphx
+            plt.xticks(x, my_xticks)
+
+            plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
+            out = self.pcap_filepath.replace('.pcap', '_out_degree' + file_ending)
+            plt.savefig(out,dpi=500)
+            return out
+
+        def plot_in_degree(file_ending: str):
+            plt.gcf().clear()
+            in_degree = self.get_in_degree()
+
+            graphx, graphy = [], []
+            for entry in in_degree:
+                graphx.append(entry[0])
+                graphy.append(entry[1])
+            plt.autoscale(enable=True, axis='both')
+            plt.title("Indegree")
+            plt.xlabel('IpAddress')
+            plt.ylabel('Indegree')
+            width = 0.1
+            plt.xlim([0, len(graphx)])
+            plt.grid(True)
+
+            x = range(0,len(graphx))
+            my_xticks = graphx
+            plt.xticks(x, my_xticks)
+
+            plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
+            out = self.pcap_filepath.replace('.pcap', '_in_degree' + file_ending)
+            plt.savefig(out,dpi=500)
+            return out
+
 
         ttl_out_path = plot_ttl('.' + format)
         mss_out_path = plot_mss('.' + format)
@@ -995,6 +1151,8 @@ class Statistics:
         plot_interval_new_win_size = plot_interval_new_win_size('.' + format)
         plot_interval_new_mss = plot_interval_new_mss('.' + format)
         plot_packets_per_connection_out = plot_packets_per_connection('.' + format)
+        plot_out_degree = plot_out_degree('.' + format)
+        plot_in_degree = plot_in_degree('.' + format)
 
         ## Time consuming plot
         # port_out_path = plot_port('.' + format)
@@ -1003,3 +1161,4 @@ class Statistics:
         # ip_dst_out_path = plot_ip_dst('.' + format)
 
         print("Saved plots in the input PCAP directory.")
+        print("In-/Out-/Overall-degree plots not fully finished yet")