Browse Source

In- and Out-Degree Plots reworked

Marcel Juschak 6 years ago
parent
commit
ef813f7cd6
1 changed files with 131 additions and 123 deletions
  1. 131 123
      code/ID2TLib/Statistics.py

+ 131 - 123
code/ID2TLib/Statistics.py

@@ -547,14 +547,14 @@ class Statistics:
 
     def get_in_degree(self):
         """
-        determines the in-degree for each local ipAddress, i.e. for every IP the count of ipAddresses it has received packets from
-        :return: a list, each entry consists of one local IPAddress and its associated in-degree
+        determines the in-degree for each ipAddress, i.e. for every IP the count of ipAddresses it has received packets from
+        :return: a list, each entry consists of one IPAddress and its associated in-degree
         """
 
         in_degree_raw = self.stats_db._process_user_defined_query(
-                "SELECT ipAddressA, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'in\' AND portNumber = portA GROUP BY ipAddress " +
+                "SELECT ipAddressA, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics_stateless ON ipAddress = ipAddressA WHERE portDirection=\'in\' AND portNumber = portA GROUP BY ipAddress " +
                 "UNION " +
-                "SELECT ipAddressB, Count(DISTINCT ipAddressA) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressB WHERE portDirection=\'in\' AND portNumber = portB GROUP BY ipAddress")
+                "SELECT ipAddressB, Count(DISTINCT ipAddressA) FROM ip_ports JOIN conv_statistics_stateless ON ipAddress = ipAddressB WHERE portDirection=\'in\' AND portNumber = portB GROUP BY ipAddress")
 
         #Because of the structure of the database, there could be 2 entries for the same IP Address, therefore accumulate their sums
         in_degree = self.filter_multiples(in_degree_raw)
@@ -563,61 +563,49 @@ class Statistics:
 
     def get_out_degree(self):
         """
-        determines the out-degree for each local ipAddress, i.e. for every IP the count of ipAddresses it has sent packets to
-        :return: a list, each entry consists of one local IPAddress and its associated out-degree
-        """
-        """
-
-        test = self.stats_db._process_user_defined_query("SELECT DISTINCT * FROM conv_statistics")
-        #test2 = self.stats_db._process_user_defined_query("SELECT DISTINCT ipAddressB, portB FROM conv_statistics")
-        print("############# conv_statistics IP's + Ports")
-        for p in test:
-            print(p)
-        #for p in test2:
-        #    print(p)
-
-        print("############## ip_ports ##################")
-        test3 = self.stats_db._process_user_defined_query("SELECT DISTINCT ipAddress, portNumber, portDirection FROM ip_ports")
-        for p in test3:
-            print(p)
-
-        print("")
-        print("############## AFTER JOIN - A #############")
-        test4 = self.stats_db._process_user_defined_query(
-                "SELECT * FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'out\' AND portNumber = portA") # Hier werden die anfang locals rausgefiltert!
-        for p in test4:
-            print(p)
-
-        print("")
-        print("############## AFTER JOIN - B #############")
-        test6 = self.stats_db._process_user_defined_query(
-                "SELECT * FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressB WHERE portDirection=\'out\' AND portNumber = portB") # Hier werden die anfang locals rausgefiltert!
-        for p in test6:
-            print(p)
-
-        print("")
-        print("############## BUILD UP PART FOR PART#############")
-        test5 = self.stats_db._process_user_defined_query(
-                "SELECT ipAddress, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'out\' GROUP BY ipAddress")
-        for p in test5:
-            print(p)
+        determines the out-degree for each ipAddress, i.e. for every IP the count of ipAddresses it has sent packets to
+        :return: a list, each entry consists of one IPAddress and its associated out-degree
         """
+        
         out_degree_raw = self.stats_db._process_user_defined_query(
-                "SELECT ipAddressA, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'out\' AND portNumber = portA GROUP BY ipAddress " +
+                "SELECT ipAddressA, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics_stateless ON ipAddress = ipAddressA WHERE portDirection=\'out\' AND portNumber = portA GROUP BY ipAddress " +
                 "UNION " +
-                "SELECT ipAddressB, Count(DISTINCT ipAddressA) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressB WHERE portDirection=\'out\' AND portNumber = portB GROUP BY ipAddress")
-
-        #filter out non-local IPs
-        #out_degree_raw_2 = []
-        #for entry in out_degree_raw:
-        #    if IPAddress.parse(entry[0]).is_reserved():
-        #        out_degree_raw_2.append(entry)
+                "SELECT ipAddressB, Count(DISTINCT ipAddressA) FROM ip_ports JOIN conv_statistics_stateless ON ipAddress = ipAddressB WHERE portDirection=\'out\' AND portNumber = portB GROUP BY ipAddress")
 
         #Because of the structure of the database, there could be 2 entries for the same IP Address, therefore accumulate their sums
         out_degree = self.filter_multiples(out_degree_raw)
 
         return out_degree
 
+    def filter_multiples(self, entries):
+        """
+        helper function, for get_out_degree and get_in_degree
+        filters the given list for duplicate IpAddresses and, if duplciates are present, accumulates their values
+
+        :param entries: list, each entry consists of an ipAddress and a numeric value
+        :return: a filtered list, without duplicate ipAddresses
+        """
+
+        filtered_entries = []
+        done = []
+        for p1 in entries:
+            added = False
+            if p1 in done:
+                continue
+            for p2 in entries:
+                if p1[0] == p2[0] and p1 != p2:
+                    filtered_entries.append((p1[0], p1[1] + p2[1]))
+                    done.append(p1)
+                    done.append(p2)
+                    print("duplicate found:", p1, " and ", p2)
+                    added = True
+                    break
+
+            if not added:
+                filtered_entries.append(p1)
+
+        return filtered_entries
+
     def get_avg_delay_local_ext(self):
         """
         Calculates the average delay of a packet for external and local communication, based on the tcp handshakes
@@ -664,36 +652,6 @@ class Statistics:
             avg_delay_local = 0.06
         return avg_delay_local, avg_delay_external
 
-    def filter_multiples(self, entries):
-        """
-        helper function, for get_out_degree and get_in_degree
-        filters the given list for duplicate IpAddresses and, if duplciates are present, accumulates their values
-
-        :param entries: list, each entry consists of an ipAddress and a numeric value
-        :return: a filtered list, without duplicate ipAddresses
-        """
-
-        filtered_entries = []
-        done = []
-        for p1 in entries:
-            added = False
-            if p1 in done:
-                continue
-            for p2 in entries:
-                if p1[0] == p2[0] and p1 != p2:
-                    filtered_entries.append((p1[0], p1[1] + p2[1]))
-                    done.append(p1)
-                    done.append(p2)
-                    #entries.remove(p2)
-                    added = True
-                    break
-
-            if not added:
-                filtered_entries.append(p1)
-
-        return filtered_entries
-
-
     def get_statistics_database(self):
         """
         :return: A reference to the statistics database object
@@ -1139,58 +1097,109 @@ class Statistics:
             else:
                 print("Error plot protocol: No protocol values found!")
 
+        def plot_in_degree(file_ending: str):
+            """
+            Creates a Plot, visualizing the in-degree for every IP Address
+
+            :param file_ending: The file extension for the output file containing the plot, e.g. "pdf"
+            :return: A filepath to the file containing the created plot
+            """
+
+            plt.gcf().clear()
+
+            # retrieve data
+            in_degree = self.get_in_degree()
+
+            if(in_degree):
+                graphx, graphy = [], []
+                for entry in in_degree:
+                    # degree values
+                    graphx.append(entry[1])
+                    # IP labels
+                    graphy.append(entry[0])
+
+                # set labels
+                plt.title("Indegree per IP Address")
+                plt.ylabel('IpAddress')
+                plt.xlabel('Indegree')
+
+                #set width of the bars
+                width = 0.3
+
+                # set scalings
+                plt.figure(figsize=(int(len(graphx))/20 + 5, int(len(graphy)/5) + 5))  # these proportions just worked well
+
+                #set limits of the axis
+                plt.ylim([0, len(graphy)])
+                plt.xlim([0, max(graphx) + 10])
+
+                # display numbers at each bar
+                for i, v in enumerate(graphx):
+                    plt.text(v + 1, i + .1, str(v), color='blue', fontweight='bold')
+
+                # display grid for better visuals
+                plt.grid(True)
+
+                # plot the bar
+                plt.barh(graphy, graphx, width, align='center', linewidth=1, color='red', edgecolor='red')
+                out = self.pcap_filepath.replace('.pcap', '_in_degree' + file_ending)
+                plt.tight_layout()
+                plt.savefig(out,dpi=500)
+                return out
+            else:
+                print("Error: No statistics Information for plotting out-degrees found")
+
         def plot_out_degree(file_ending: str):
+            """
+            Creates a Plot, visualizing the out-degree for every IP Address
+
+            :param file_ending: The file extension for the output file containing the plot, e.g. "pdf"
+            :return: A filepath to the file containing the created plot
+            """
+
             plt.gcf().clear()
+
+            # retrieve data
             out_degree = self.get_out_degree()
-            #print("")
-            #print("#############in plot_out_degree###########")
-            #print(out_degree)
 
-            graphx, graphy = [], []
-            for entry in out_degree:
-                graphx.append(entry[0])
-                graphy.append(entry[1])
-            plt.autoscale(enable=True, axis='both')
-            plt.title("Outdegree")
-            plt.xlabel('IpAddress')
-            plt.ylabel('Outdegree')
-            width = 0.1
-            plt.xlim([0, len(graphx)])
-            plt.grid(True)
+            if(out_degree):
+                graphx, graphy = [], []
+                for entry in out_degree:
+                    # degree values
+                    graphx.append(entry[1])
+                    # IP labels
+                    graphy.append(entry[0])
 
-            x = range(0,len(graphx))
-            my_xticks = graphx
-            plt.xticks(x, my_xticks)
+                # set labels
+                plt.title("Outdegree per IP Address")
+                plt.ylabel('IpAddress')
+                plt.xlabel('Outdegree')
 
-            plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
-            out = self.pcap_filepath.replace('.pcap', '_out_degree' + file_ending)
-            plt.savefig(out,dpi=500)
-            return out
+                #set width of the bars
+                width = 0.3
 
-        def plot_in_degree(file_ending: str):
-            plt.gcf().clear()
-            in_degree = self.get_in_degree()
+                # set scalings
+                plt.figure(figsize=(int(len(graphx))/20 + 5, int(len(graphy)/5) + 5))  # these proportions just worked well
 
-            graphx, graphy = [], []
-            for entry in in_degree:
-                graphx.append(entry[0])
-                graphy.append(entry[1])
-            plt.autoscale(enable=True, axis='both')
-            plt.title("Indegree")
-            plt.xlabel('IpAddress')
-            plt.ylabel('Indegree')
-            width = 0.1
-            plt.xlim([0, len(graphx)])
-            plt.grid(True)
+                #set limits of the axis
+                plt.ylim([0, len(graphy)])
+                plt.xlim([0, max(graphx) + 10])
 
-            x = range(0,len(graphx))
-            my_xticks = graphx
-            plt.xticks(x, my_xticks)
+                # display numbers at each bar
+                for i, v in enumerate(graphx):
+                    plt.text(v + 1, i + .1, str(v), color='blue', fontweight='bold')
 
-            plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
-            out = self.pcap_filepath.replace('.pcap', '_in_degree' + file_ending)
-            plt.savefig(out,dpi=500)
-            return out
+                # display grid for better visuals
+                plt.grid(True)
+
+                # plot the bar
+                plt.barh(graphy, graphx, width, align='center', linewidth=1, color='red', edgecolor='red')
+                out = self.pcap_filepath.replace('.pcap', '_out_degree' + file_ending)
+                plt.tight_layout()
+                plt.savefig(out,dpi=500)
+                return out
+            else:
+                print("Error: No statistics Information for plotting out-degrees found")
 
         def plot_avgpkts_per_comm_interval(file_ending: str):
             """
@@ -1285,4 +1294,3 @@ class Statistics:
         # ip_dst_out_path = plot_ip_dst('.' + format)
 
         print("Saved plots in the input PCAP directory.")
-        print("In-/Out-/Overall-degree plots not fully finished yet")