浏览代码

Merge branch 'develop' of https://git.tk.informatik.tu-darmstadt.de/leon.boeck/ID2T-toolkit-BotnetTraffic into develop

christof 6 年之前
父节点
当前提交
a478031694
共有 1 个文件被更改,包括 214 次插入0 次删除
  1. 214 0
      code/ID2TLib/Statistics.py

+ 214 - 0
code/ID2TLib/Statistics.py

@@ -10,6 +10,7 @@ matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 from ID2TLib.PcapFile import PcapFile
 from ID2TLib.StatsDatabase import StatsDatabase
+from ID2TLib.IPv4 import IPAddress
 
 
 class Statistics:
@@ -544,6 +545,108 @@ class Statistics:
         else:
             return None
 
+    def get_in_degree(self):
+        """
+        determines the in-degree for each local ipAddress, i.e. for every IP the count of ipAddresses it has received packets from
+        :return: a list, each entry consists of one local IPAddress and its associated in-degree
+        """
+
+        in_degree_raw = self.stats_db._process_user_defined_query(
+                "SELECT ipAddressA, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'in\' AND portNumber = portA GROUP BY ipAddress " +
+                "UNION " +
+                "SELECT ipAddressB, Count(DISTINCT ipAddressA) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressB WHERE portDirection=\'in\' AND portNumber = portB GROUP BY ipAddress")
+        
+        #Because of the structure of the database, there could be 2 entries for the same IP Address, therefore accumulate their sums
+        in_degree = self.filter_multiples(in_degree_raw)
+
+        return in_degree
+
+    def get_out_degree(self):
+        """
+        determines the out-degree for each local ipAddress, i.e. for every IP the count of ipAddresses it has sent packets to
+        :return: a list, each entry consists of one local IPAddress and its associated out-degree
+        """
+        """
+
+        test = self.stats_db._process_user_defined_query("SELECT DISTINCT * FROM conv_statistics")
+        #test2 = self.stats_db._process_user_defined_query("SELECT DISTINCT ipAddressB, portB FROM conv_statistics")
+        print("############# conv_statistics IP's + Ports")
+        for p in test:
+            print(p)
+        #for p in test2:
+        #    print(p)
+
+        print("############## ip_ports ##################")
+        test3 = self.stats_db._process_user_defined_query("SELECT DISTINCT ipAddress, portNumber, portDirection FROM ip_ports")
+        for p in test3:
+            print(p)
+
+        print("")
+        print("############## AFTER JOIN - A #############")
+        test4 = self.stats_db._process_user_defined_query(
+                "SELECT * FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'out\' AND portNumber = portA") # Hier werden die anfang locals rausgefiltert!
+        for p in test4:
+            print(p)
+
+        print("")
+        print("############## AFTER JOIN - B #############")
+        test6 = self.stats_db._process_user_defined_query(
+                "SELECT * FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressB WHERE portDirection=\'out\' AND portNumber = portB") # Hier werden die anfang locals rausgefiltert!
+        for p in test6:
+            print(p)
+
+        print("")
+        print("############## BUILD UP PART FOR PART#############")
+        test5 = self.stats_db._process_user_defined_query(
+                "SELECT ipAddress, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'out\' GROUP BY ipAddress")
+        for p in test5:
+            print(p)
+        """
+        out_degree_raw = self.stats_db._process_user_defined_query(
+                "SELECT ipAddressA, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressA WHERE portDirection=\'out\' AND portNumber = portA GROUP BY ipAddress " +
+                "UNION " +
+                "SELECT ipAddressB, Count(DISTINCT ipAddressA) FROM ip_ports JOIN conv_statistics ON ipAddress = ipAddressB WHERE portDirection=\'out\' AND portNumber = portB GROUP BY ipAddress")
+
+        #filter out non-local IPs
+        #out_degree_raw_2 = []
+        #for entry in out_degree_raw:
+        #    if IPAddress.parse(entry[0]).is_reserved():
+        #        out_degree_raw_2.append(entry)
+
+        #Because of the structure of the database, there could be 2 entries for the same IP Address, therefore accumulate their sums
+        out_degree = self.filter_multiples(out_degree_raw)
+
+        return out_degree
+
+    def filter_multiples(self, entries):
+        """
+        helper function, for get_out_degree and get_in_degree
+        filters the given list for duplicate IpAddresses and, if duplciates are present, accumulates their values
+
+        :param entries: list, each entry consists of an ipAddress and a numeric value
+        :return: a filtered list, without duplicate ipAddresses
+        """
+
+        filtered_entries = []
+        done = []
+        for p1 in entries:       
+            added = False
+            if p1 in done:
+                continue
+            for p2 in entries:
+                if p1[0] == p2[0] and p1 != p2:
+                    filtered_entries.append((p1[0], p1[1] + p2[1]))
+                    done.append(p1)
+                    done.append(p2)
+                    #entries.remove(p2)
+                    added = True
+                    break
+
+            if not added:
+                filtered_entries.append(p1)
+
+        return filtered_entries
+
 
     def get_statistics_database(self):
         """
@@ -925,6 +1028,113 @@ class Statistics:
                 plt.savefig(out, dpi=500)
                 return out
 
+        def plot_packets_per_connection(file_ending: str):
+            plt.gcf().clear()
+            result = self.stats_db._process_user_defined_query(
+                "SELECT ipAddressA, portA, ipAddressB, portB, pktsCount FROM conv_statistics")
+            if (result):
+                graphy, graphx = [], []
+                # plot data in descending order
+                result = sorted(result, key=lambda row: row[4])
+
+                # compute plot data
+                for i, row in enumerate(result):
+                    addr1, addr2 = "%s:%d" % (row[0], row[1]), "%s:%d" % (row[2], row[3])
+                    # adjust the justification of strings to improve appearance
+                    len_max = max(len(addr1), len(addr2))
+                    addr1 = addr1.ljust(len_max)
+                    addr2 = addr2.ljust(len_max)
+                    # add plot data
+                    graphy.append("%s\n%s" % (addr1, addr2))
+                    graphx.append(row[4])
+
+                # compute plot height in inches
+                dist_mult_height, dist_mult_width = 0.55, 0.07  # these values turned out to work well
+                plt_height, plt_width = len(graphy) * dist_mult_height, max(graphx) * dist_mult_width
+                title_distance = 1 + 0.012*52.8/plt_height  # orginally, a good title distance turned out to be 1.012 with a plot height of 52.8 
+
+                # have x axis and its label appear at the top (instead of bottom)
+                fig, ax = plt.subplots()
+                ax.xaxis.tick_top()
+                ax.xaxis.set_label_position("top")
+
+                # set additional plot parameters
+                plt.title("Sent packets per connection", y=title_distance)
+                plt.xlabel('Number of Packets')
+                plt.ylabel('Connection')
+                width = 0.5
+                plt.grid(True)
+                plt.gca().margins(y=0)  # removes the space between data and x-axis within the plot 
+                plt.gcf().set_size_inches(plt_width, plt_height)  # set plot size
+
+                # plot the above data, first use plain numbers as graphy to maintain sorting
+                plt.barh(range(len(graphy)), graphx, width, align='center', linewidth=1, color='red', edgecolor='red')
+                # now change the y numbers to the respective address labels
+                plt.yticks(range(len(graphy)), graphy)
+                # use tight layout to cut off unnecessary space
+                plt.tight_layout(pad=4)
+
+                # save created figure
+                out = self.pcap_filepath.replace('.pcap', '_plot-connection' + file_ending)
+                plt.savefig(out, dpi=500)
+                return out
+            else:
+                print("Error plot protocol: No protocol values found!")
+
+        def plot_out_degree(file_ending: str):
+            plt.gcf().clear()
+            out_degree = self.get_out_degree()
+            #print("")
+            #print("#############in plot_out_degree###########")
+            #print(out_degree)
+
+            graphx, graphy = [], []
+            for entry in out_degree:
+                graphx.append(entry[0])
+                graphy.append(entry[1])
+            plt.autoscale(enable=True, axis='both')
+            plt.title("Outdegree")
+            plt.xlabel('IpAddress')
+            plt.ylabel('Outdegree')
+            width = 0.1
+            plt.xlim([0, len(graphx)])
+            plt.grid(True)
+
+            x = range(0,len(graphx))
+            my_xticks = graphx
+            plt.xticks(x, my_xticks)
+
+            plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
+            out = self.pcap_filepath.replace('.pcap', '_out_degree' + file_ending)
+            plt.savefig(out,dpi=500)
+            return out
+
+        def plot_in_degree(file_ending: str):
+            plt.gcf().clear()
+            in_degree = self.get_in_degree()
+
+            graphx, graphy = [], []
+            for entry in in_degree:
+                graphx.append(entry[0])
+                graphy.append(entry[1])
+            plt.autoscale(enable=True, axis='both')
+            plt.title("Indegree")
+            plt.xlabel('IpAddress')
+            plt.ylabel('Indegree')
+            width = 0.1
+            plt.xlim([0, len(graphx)])
+            plt.grid(True)
+
+            x = range(0,len(graphx))
+            my_xticks = graphx
+            plt.xticks(x, my_xticks)
+
+            plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
+            out = self.pcap_filepath.replace('.pcap', '_in_degree' + file_ending)
+            plt.savefig(out,dpi=500)
+            return out
+
+
         ttl_out_path = plot_ttl('.' + format)
         mss_out_path = plot_mss('.' + format)
         win_out_path = plot_win('.' + format)
@@ -940,6 +1150,9 @@ class Statistics:
         plot_interval_new_tos = plot_interval_new_tos('.' + format)
         plot_interval_new_win_size = plot_interval_new_win_size('.' + format)
         plot_interval_new_mss = plot_interval_new_mss('.' + format)
+        plot_packets_per_connection_out = plot_packets_per_connection('.' + format)
+        plot_out_degree = plot_out_degree('.' + format)
+        plot_in_degree = plot_in_degree('.' + format)
 
         ## Time consuming plot
         # port_out_path = plot_port('.' + format)
@@ -948,3 +1161,4 @@ class Statistics:
         # ip_dst_out_path = plot_ip_dst('.' + format)
 
         print("Saved plots in the input PCAP directory.")
+        print("In-/Out-/Overall-degree plots not fully finished yet")