Bladeren bron

Refactor some plot creation code and improve its documentation

dustin.born 6 jaren geleden
bovenliggende
commit
3cfc0f8e03
1 gewijzigde bestanden met toevoegingen van 78 en 162 verwijderingen
  1. 78 162
      code/ID2TLib/Statistics.py

+ 78 - 162
code/ID2TLib/Statistics.py

@@ -1032,71 +1032,6 @@ class Statistics:
                 plt.savefig(out, dpi=500)
                 return out
 
-        def plot_packets_per_connection(file_ending: str):
-            """
-            Plots the exchanged packets per connection as horizontal bar plot. 
-            Included are 'half-open' connections, where only one packet is exchanged.
-            Note: there may be cutoff problems within the plot if there is to little data.
-
-            :param file_ending: The file extension for the output file containing the plot
-            :return: A filepath to the file containing the created plot
-            """
-            plt.gcf().clear()
-            result = self.stats_db._process_user_defined_query(
-                "SELECT ipAddressA, portA, ipAddressB, portB, pktsCount FROM conv_statistics_stateless")
-
-            if (result):
-                graphy, graphx = [], []
-                # plot data in descending order
-                result = sorted(result, key=lambda row: row[4])
-
-                # compute plot data
-                for i, row in enumerate(result):
-                    addr1, addr2 = "%s:%d" % (row[0], row[1]), "%s:%d" % (row[2], row[3])
-                    # adjust the justification of strings to improve appearance
-                    len_max = max(len(addr1), len(addr2))
-                    addr1 = addr1.ljust(len_max)
-                    addr2 = addr2.ljust(len_max)
-                    # add plot data
-                    graphy.append("%s\n%s" % (addr1, addr2))
-                    graphx.append(row[4])
-
-                # compute plot height in inches
-                dist_mult_height, dist_mult_width = 0.55, 0.07  # these values turned out to work well
-                plt_height, plt_width = len(graphy) * dist_mult_height, max(graphx) * dist_mult_width
-                title_distance = 1 + 0.012*52.8/plt_height  # orginally, a good title distance turned out to be 1.012 with a plot height of 52.8
-
-                # have x axis and its label appear at the top (instead of bottom)
-                fig, ax = plt.subplots()
-                ax.xaxis.tick_top()
-                ax.xaxis.set_label_position("top")
-
-                # set additional plot parameters
-                plt.title("Sent packets per connection", y=title_distance)
-                plt.xlabel('Number of Packets')
-                plt.ylabel('Connection')
-                width = 0.5
-                plt.grid(True)
-                plt.gca().margins(y=0)  # removes the space between data and x-axis within the plot
-                plt.gcf().set_size_inches(plt_width, plt_height)  # set plot size
-
-                # plot the above data, first use plain numbers as graphy to maintain sorting
-                plt.barh(range(len(graphy)), graphx, width, align='center', linewidth=1, color='red', edgecolor='red')
-                # now change the y numbers to the respective address labels
-                plt.yticks(range(len(graphy)), graphy)
-                # try to use tight layout to cut off unnecessary space
-                try:
-                    plt.tight_layout(pad=4)
-                except ValueError:
-                    pass
-
-                # save created figure
-                out = self.pcap_filepath.replace('.pcap', '_plot-PktCount per Connection Distribution' + file_ending)
-                plt.savefig(out, dpi=500)
-                return out
-            else:
-                print("Error plot protocol: No protocol values found!")
-
         def plot_in_degree(file_ending: str):
             """
             Creates a Plot, visualizing the in-degree for every IP Address
@@ -1207,18 +1142,24 @@ class Statistics:
             else:
                 print("Error: No statistics Information for plotting out-degrees found")
 
-        def plot_avgpkts_per_comm_interval(file_ending: str):
+        def plot_big_comm_interval_stat(attr:str, table:str, title:str, xlabel:str, suffix:str):
             """
-            Plots the exchanged packets per connection as horizontal bar plot. 
+            Plots the desired statistc per connection as horizontal bar plot. 
             Included are 'half-open' connections, where only one packet is exchanged.
-            Note: there may be cutoff problems within the plot if there is to little data.
-
-            :param file_ending: The file extension for the output file containing the plot
+            The given statistics table has to have at least the attributes 'ipAddressA', 'portA', 'ipAddressB',
+            'portB' and the specified additional attribute.
+            Note: there may be cutoff/scaling problems within the plot if there is too little data.
+
+            :param attr: The desired statistic, named with respect to its attribute in the given statistics table
+            :param table: The statistics table 
+            :param title: The title of the created plot
+            :param xlabel: The name of the x-axis of the created plot
+            :param suffix: The suffix of the created file, including file extension
             :return: A filepath to the file containing the created plot
             """
             plt.gcf().clear()
             result = self.stats_db._process_user_defined_query(
-                "SELECT ipAddressA, portA, ipAddressB, portB, avgPktCount FROM comm_interval_statistics")
+                "SELECT ipAddressA, portA, ipAddressB, portB, %s FROM %s" % (attr, table))
 
             if (result):
                 graphy, graphx = [], []
@@ -1236,106 +1177,81 @@ class Statistics:
                     graphy.append("%s\n%s" % (addr1, addr2))
                     graphx.append(row[4])
 
-                # compute plot height in inches
-                dist_mult_height, dist_mult_width = 0.55, 0.07  # these values turned out to work well
-                plt_height, plt_width = len(graphy) * dist_mult_height, max(graphx) * dist_mult_width
-                title_distance = 1 + 0.012*52.8/plt_height  # orginally, a good title distance turned out to be 1.012 with a plot height of 52.8
-
-                # have x axis and its label appear at the top (instead of bottom)
-                fig, ax = plt.subplots()
-                ax.xaxis.tick_top()
-                ax.xaxis.set_label_position("top")
-
-                # set additional plot parameters
-                plt.title("Average number of packets per communication interval", y=title_distance)
-                plt.xlabel('Number of Packets')
-                plt.ylabel('Connection')
-                width = 0.5
-                plt.grid(True)
-                plt.gca().margins(y=0)  # removes the space between data and x-axis within the plot
-                plt.gcf().set_size_inches(plt_width, plt_height)  # set plot size
-
-                # plot the above data, first use plain numbers as graphy to maintain sorting
-                plt.barh(range(len(graphy)), graphx, width, align='center', linewidth=1, color='red', edgecolor='red')
-                # now change the y numbers to the respective address labels
-                plt.yticks(range(len(graphy)), graphy)
-                # try to use tight layout to cut off unnecessary space
-                try:
-                    plt.tight_layout(pad=4)
-                except ValueError:
-                    pass
+            # compute plot height in inches
+            dist_mult_height, dist_mult_width = 0.55, 0.07  # these values turned out to work well
+            plt_height, plt_width = len(graphy) * dist_mult_height, max(graphx) * dist_mult_width
+            title_distance = 1 + 0.012*52.8/plt_height  # orginally, a good title distance turned out to be 1.012 with a plot height of 52.8
 
-                # save created figure
-                out = self.pcap_filepath.replace('.pcap', '_plot-Avg PktCount Communication Interval Distribution' + file_ending)
-                plt.savefig(out, dpi=500)
-                return out
-            else:
-                print("Error plot protocol: No protocol values found!")
+            # have x axis and its label appear at the top (instead of bottom)
+            fig, ax = plt.subplots()
+            ax.xaxis.tick_top()
+            ax.xaxis.set_label_position("top")
 
-        def plot_avgtime_between_comm_interval(file_ending: str):
+            # set additional plot parameters
+            plt.title(title, y=title_distance)
+            plt.xlabel(xlabel)
+            plt.ylabel('Connection')
+            width = 0.5
+            plt.grid(True)
+            plt.gca().margins(y=0)  # removes the space between data and x-axis within the plot
+            plt.gcf().set_size_inches(plt_width, plt_height)  # set plot size
+
+            # plot the above data, first use plain numbers as graphy to maintain sorting
+            plt.barh(range(len(graphy)), graphx, width, align='center', linewidth=1, color='red', edgecolor='red')
+            # now change the y numbers to the respective address labels
+            plt.yticks(range(len(graphy)), graphy)
+            # try to use tight layout to cut off unnecessary space
+            try:
+                plt.tight_layout(pad=4)
+            except ValueError:
+                pass
+
+            # save created figure
+            out = self.pcap_filepath.replace('.pcap', suffix)
+            plt.savefig(out, dpi=500)
+            return out
+
+        def plot_packets_per_connection(file_ending: str):
             """
-            Plots the exchanged packets per connection as horizontal bar plot. 
-            Included are 'half-open' connections, where only one packet is exchanged.
-            Note: there may be cutoff problems within the plot if there is to little data.
+            Plots the total number of exchanged packets per connection. 
 
             :param file_ending: The file extension for the output file containing the plot
             :return: A filepath to the file containing the created plot
             """
-            plt.gcf().clear()
-            result = self.stats_db._process_user_defined_query(
-                "SELECT ipAddressA, portA, ipAddressB, portB, avgTimeBetweenIntervals FROM comm_interval_statistics")
 
-            if (result):
-                graphy, graphx = [], []
-                # plot data in descending order
-                result = sorted(result, key=lambda row: row[4])
+            title = 'Number of exchanged packets per connection'
+            suffix = '_plot-PktCount per Connection Distribution' + file_ending
 
-                # compute plot data
-                for i, row in enumerate(result):
-                    addr1, addr2 = "%s:%d" % (row[0], row[1]), "%s:%d" % (row[2], row[3])
-                    # adjust the justification of strings to improve appearance
-                    len_max = max(len(addr1), len(addr2))
-                    addr1 = addr1.ljust(len_max)
-                    addr2 = addr2.ljust(len_max)
-                    # add plot data
-                    graphy.append("%s\n%s" % (addr1, addr2))
-                    graphx.append(row[4])
+            # plot data and return outpath
+            return plot_big_comm_interval_stat("pktsCount", "conv_statistics_stateless", title, "Number of packets", suffix)
 
-                # compute plot height in inches
-                dist_mult_height, dist_mult_width = 0.55, 0.07  # these values turned out to work well
-                plt_height, plt_width = len(graphy) * dist_mult_height, max(graphx) * dist_mult_width
-                title_distance = 1 + 0.012*52.8/plt_height  # orginally, a good title distance turned out to be 1.012 with a plot height of 52.8
-
-                # have x axis and its label appear at the top (instead of bottom)
-                fig, ax = plt.subplots()
-                ax.xaxis.tick_top()
-                ax.xaxis.set_label_position("top")
-
-                # set additional plot parameters
-                plt.title("Average time between communication intervals in seconds", y=title_distance)
-                plt.xlabel('Average time between intervals')
-                plt.ylabel('Connection')
-                width = 0.5
-                plt.grid(True)
-                plt.gca().margins(y=0)  # removes the space between data and x-axis within the plot
-                plt.gcf().set_size_inches(plt_width, plt_height)  # set plot size
-
-                # plot the above data, first use plain numbers as graphy to maintain sorting
-                plt.barh(range(len(graphy)), graphx, width, align='center', linewidth=1, color='red', edgecolor='red')
-                # now change the y numbers to the respective address labels
-                plt.yticks(range(len(graphy)), graphy)
-                # try to use tight layout to cut off unnecessary space
-                try:
-                    plt.tight_layout(pad=4)
-                except ValueError:
-                    pass
+        def plot_avg_pkts_per_comm_interval(file_ending: str):
+            """
+            Plots the average number of exchanged packets per communication interval for every connection. 
 
-                # save created figure
-                out = self.pcap_filepath.replace('.pcap', '_plot-Avg Time Between Communication Intervals Distribution' + file_ending)
-                plt.savefig(out, dpi=500)
-                return out
-            else:
-                print("Error plot protocol: No protocol values found!")
+            :param file_ending: The file extension for the output file containing the plot
+            :return: A filepath to the file containing the created plot
+            """
+
+            title = 'Average number of exchanged packets per communication interval'
+            suffix = '_plot-Avg PktCount Communication Interval Distribution' + file_ending
+
+            # plot data and return outpath
+            return plot_big_comm_interval_stat("avgPktCount", "comm_interval_statistics" ,title, "Number of packets", suffix)
+
+        def plot_avg_time_between_comm_interval(file_ending: str):
+            """
+            Plots the average time between the communication intervals of every connection. 
+
+            :param file_ending: The file extension for the output file containing the plot
+            :return: A filepath to the file containing the created plot
+            """
+
+            title = 'Average time between communication intervals in seconds'
+            suffix = '_plot-Avg Time Between Communication Intervals Distribution' + file_ending
+
+            # plot data and return outpath
+            return plot_big_comm_interval_stat("avgTimeBetweenIntervals", "comm_interval_statistics", title, 'Average time between intervals', suffix)
 
 
         ttl_out_path = plot_ttl('.' + format)
@@ -1356,8 +1272,8 @@ class Statistics:
         plot_packets_per_connection_out = plot_packets_per_connection('.' + format)
         plot_out_degree = plot_out_degree('.' + format)
         plot_in_degree = plot_in_degree('.' + format)
-        plot_avgpkts_per_comm_interval_out = plot_avgpkts_per_comm_interval('.' + format)
-        plot_avgtime_between_comm_interval_out = plot_avgtime_between_comm_interval('.' + format)
+        plot_avg_pkts_per_comm_interval_out = plot_avg_pkts_per_comm_interval('.' + format)
+        plot_avg_time_between_comm_interval_out = plot_avg_time_between_comm_interval('.' + format)
 
         ## Time consuming plot
         # port_out_path = plot_port('.' + format)