Browse Source

Added Overall Degrees and deleted debug-prints

Marcel Juschak 6 years ago
parent
commit
65bdb7ee97
1 changed files with 97 additions and 2 deletions
  1. 97 2
      code/ID2TLib/Statistics.py

+ 97 - 2
code/ID2TLib/Statistics.py

@@ -577,6 +577,46 @@ class Statistics:
 
         return out_degree
 
+    def get_overall_degree(self):
+        """
+        determines the overall-degree for each ipAddress, i.e. for every IP the count of ipAddresses it has sent packets to
+        :return: a list, each entry consists of one IPAddress and its associated overall-degree
+        """
+
+        out_degrees = self.get_out_degree()
+        in_degrees = self.get_in_degree()
+        overall_degrees = []
+        processed = {} # Dict, taking an IP Address and returning True, if the IP has already been processed and added to overall_degree
+
+        # initialize values of the dict for in_degrees, this is important for error-free checking whether there are not processed IPs
+        # for out_degrees this can be done without an additional loop
+        for inD in in_degrees:
+            processed[inD[0]] = False
+
+        for outD in out_degrees:
+            ip_out = outD[0]
+            processed[ip_out] = False
+
+            # add the sum of degrees for all IPs that appear in both lists
+            for inD in in_degrees:
+                ip_in = inD[0]
+                if ip_out == ip_in:
+                    # same IPAddress -> append sum of degrees
+                    overall_degrees.append((ip_out, outD[1] + inD[1]))
+                    processed[ip_out] = True
+
+            if not processed[ip_out]:
+                # if IP only appears in out_degree list -> just append the value
+                overall_degrees.append(outD)
+                processed[outD[0]] = True
+        
+        # add remaining IPs, which did not appear in out_degree
+        for inD in in_degrees:
+            if not processed[inD[0]]:
+                overall_degrees.append(inD)
+
+        return overall_degrees
+
     def filter_multiples(self, entries):
         """
         helper function, for get_out_degree and get_in_degree
@@ -597,7 +637,6 @@ class Statistics:
                     filtered_entries.append((p1[0], p1[1] + p2[1]))
                     done.append(p1)
                     done.append(p2)
-                    print("duplicate found:", p1, " and ", p2)
                     added = True
                     break
 
@@ -1142,6 +1181,61 @@ class Statistics:
             else:
                 print("Error: No statistics Information for plotting out-degrees found")
 
+        def plot_overall_degree(file_ending: str):
+            """
+            Creates a Plot, visualizing the overall-degree for every IP Address
+
+            :param file_ending: The file extension for the output file containing the plot, e.g. "pdf"
+            :return: A filepath to the file containing the created plot
+            """
+
+            plt.gcf().clear()
+
+            # retrieve data
+            overall_degree = self.get_overall_degree()
+
+            if(overall_degree):
+                graphx, graphy = [], []
+                for entry in overall_degree:
+                    # degree values
+                    graphx.append(entry[1])
+                    # IP labels
+                    graphy.append(entry[0])
+
+                # set labels
+                plt.title("Overalldegree per IP Address")
+                plt.ylabel('IpAddress')
+                plt.xlabel('Overalldegree')
+
+                #set width of the bars
+                width = 0.3
+
+                # set scalings
+                plt.figure(figsize=(int(len(graphx))/20 + 5, int(len(graphy)/5) + 5))  # these proportions just worked well
+
+                #set limits of the axis
+                plt.ylim([0, len(graphy)])
+                plt.xlim([0, max(graphx) + 10])
+
+                # display numbers at each bar
+                for i, v in enumerate(graphx):
+                    plt.text(v + 1, i + .1, str(v), color='blue', fontweight='bold')
+
+                # display grid for better visuals
+                plt.grid(True)
+
+                # plot the bar
+                labels = graphy
+                graphy = list(range(len(graphx)))
+                plt.barh(graphy, graphx, width, align='center', linewidth=1, color='red', edgecolor='red')
+                plt.yticks(graphy, labels)
+                out = self.pcap_filepath.replace('.pcap', '_overall_degree' + file_ending)
+                plt.tight_layout()
+                plt.savefig(out,dpi=500)
+                return out
+            else:
+                print("Error: No statistics Information for plotting overall-degrees found")
+
         def plot_big_comm_interval_stat(attr:str, table:str, title:str, xlabel:str, suffix:str):
             """
             Plots the desired statistc per connection as horizontal bar plot. 
@@ -1253,7 +1347,7 @@ class Statistics:
             # plot data and return outpath
             return plot_big_comm_interval_stat("avgTimeBetweenIntervals", "comm_interval_statistics", title, 'Average time between intervals', suffix)
 
-
+        
         ttl_out_path = plot_ttl('.' + format)
         mss_out_path = plot_mss('.' + format)
         win_out_path = plot_win('.' + format)
@@ -1272,6 +1366,7 @@ class Statistics:
         plot_packets_per_connection_out = plot_packets_per_connection('.' + format)
         plot_out_degree = plot_out_degree('.' + format)
         plot_in_degree = plot_in_degree('.' + format)
+        plot_overall_degree = plot_overall_degree('.' + format)
         plot_avg_pkts_per_comm_interval_out = plot_avg_pkts_per_comm_interval('.' + format)
         plot_avg_time_between_comm_interval_out = plot_avg_time_between_comm_interval('.' + format)