Browse Source

add user specified interval length

add pkt_rate and kbyte_rate to interval statistics table
add multiple interval_statistics tables support
add interval_tables table
add recalculate previous interval statistics tables function
increase DB version

fix load_pcap_statistics calls in tests
add process_interval_statistics_query to fix internal querys regarding interval statistics

add multiple user specified interval lengths support
add --recalculate-delete cli argument
add --recalculate-yes and --recalculate-no parameters
Jens Keim 5 years ago
parent
commit
869fbe66f4

+ 1 - 1
build.sh

@@ -122,7 +122,7 @@ SCRIPT_PATH=\${ID2T_DIR%/*}
 cd \$SCRIPT_PATH
 source .venv/bin/activate
 # Regenerate the statistics DB
-./id2t -i resources/test/reference_1998.pcap -r >/dev/null
+./id2t -i resources/test/reference_1998.pcap -rd >/dev/null
 cd code
 # Execute tests
 set -e

+ 29 - 1
code/CLI.py

@@ -74,6 +74,23 @@ class CLI(object):
         parser.add_argument('-ie', '--inject_empty', action='store_true',
                             help='injects ATTACK into an EMPTY PCAP file, using the statistics of the input PCAP.')
         parser.add_argument('-d', '--debug', help='Runs ID2T in debug mode.', action='store_true', default=False)
+        parser.add_argument('-si', '--statistics_interval', help='interval duration in seconds', action='store',
+                            type=float, nargs='+', default=[])
+        parser.add_argument('-rd', '--recalculate-delete',
+                            help='recalculate statistics even if a cached version exists.'
+                                 'also delete old interval statistics.'
+                                 'surpresses (yes, no, delete) prompt.', action='store_true',
+                            default=False)
+        parser.add_argument('-ry', '--recalculate-yes',
+                            help='recalculate statistics even if a cached version exists.'
+                                 'also recalculates old interval statistics.'
+                                 'surpresses (yes, no, delete) prompt.', action='store_true',
+                            default=False)
+        parser.add_argument('-rn', '--recalculate-no',
+                            help='recalculate statistics even if a cached version exists.'
+                                 'does not recalculate old interval statistics, but keeps them.'
+                                 'surpresses (yes, no, delete) prompt.', action='store_true',
+                            default=False)
 
         # Attack arguments
         parser.add_argument('-a', '--attack', metavar="ATTACK", action='append',
@@ -149,7 +166,18 @@ class CLI(object):
                                 self.args.debug)
 
         # Load PCAP statistics
-        controller.load_pcap_statistics(self.args.export, self.args.recalculate, self.args.statistics)
+        recalculate_intervals = None
+        if self.args.recalculate_delete:
+            self.args.recalculate = True
+        elif self.args.recalculate_yes:
+            recalculate_intervals = True
+            self.args.recalculate = True
+        elif self.args.recalculate_no:
+            recalculate_intervals = False
+            self.args.recalculate = True
+        controller.load_pcap_statistics(self.args.export, self.args.recalculate, self.args.statistics,
+                                        self.args.statistics_interval, self.args.recalculate_delete,
+                                        recalculate_intervals)
 
         # Create statistics plots
         if self.args.plot is not None:

+ 7 - 2
code/Core/Controller.py

@@ -54,7 +54,8 @@ class Controller:
             Util.OUT_DIR = os.path.join(os.path.dirname(pcap_file_path), "ID2T_results") + os.sep
         os.makedirs(Util.OUT_DIR, exist_ok=True)
 
-    def load_pcap_statistics(self, flag_write_file: bool, flag_recalculate_stats: bool, flag_print_statistics: bool):
+    def load_pcap_statistics(self, flag_write_file: bool, flag_recalculate_stats: bool, flag_print_statistics: bool,
+                             intervals, delete: bool=False, recalculate_intervals: bool=None):
         """
         Loads the PCAP statistics either from the database, if the statistics were calculated earlier, or calculates
         the statistics and creates a new database.
@@ -62,10 +63,14 @@ class Controller:
         :param flag_write_file: Writes the statistics to a file.
         :param flag_recalculate_stats: Forces the recalculation of statistics.
         :param flag_print_statistics: Prints the statistics on the terminal.
+        :param intervals: user specified interval in seconds
+        :param delete: Delete old interval statistics.
+        :param recalculate_intervals: Recalculate old interval statistics or not. Prompt user if None.
         :return: None
         """
         self.statistics.load_pcap_statistics(flag_write_file, flag_recalculate_stats, flag_print_statistics,
-                                             self.non_verbose)
+                                             self.non_verbose, intervals=intervals, delete=delete,
+                                             recalculate_intervals=recalculate_intervals)
 
     def process_attacks(self, attacks_config: list, seeds=None, time: bool=False, inject_empty: bool=False):
         """

+ 69 - 36
code/Core/Statistics.py

@@ -38,7 +38,7 @@ class Statistics:
         self.stats_db = statsDB.StatsDatabase(self.path_db)
 
     def load_pcap_statistics(self, flag_write_file: bool, flag_recalculate_stats: bool, flag_print_statistics: bool,
-                             flag_non_verbose: bool):
+                             flag_non_verbose: bool, intervals, delete: bool=False, recalculate_intervals: bool=None):
         """
         Loads the PCAP statistics for the file specified by pcap_filepath. If the database is not existing yet, the
         statistics are calculated by the PCAP file processor and saved into the newly created database. Otherwise the
@@ -49,6 +49,9 @@ class Statistics:
         :param flag_recalculate_stats: Indicates whether eventually existing statistics should be recalculated
         :param flag_print_statistics: Indicates whether the gathered basic statistics should be printed to the terminal
         :param flag_non_verbose: Indicates whether certain prints should be made or not, to reduce terminal clutter
+        :param intervals: user specified interval in seconds
+        :param delete: Delete old interval statistics.
+        :param recalculate_intervals: Recalculate old interval statistics or not. Prompt user if None.
         """
         # Load pcap and get loading time
         time_start = time.clock()
@@ -60,8 +63,39 @@ class Statistics:
         # Recalculate statistics if database does not exist OR param -r/--recalculate is provided
         if (not self.stats_db.get_db_exists()) or flag_recalculate_stats or self.stats_db.get_db_outdated():
             self.pcap_proc = pr.pcap_processor(self.pcap_filepath, str(self.do_extra_tests), Util.RESOURCE_DIR)
-            self.pcap_proc.collect_statistics()
-            self.pcap_proc.write_to_database(self.path_db)
+            previous_interval_tables = self.stats_db.process_db_query("SELECT name FROM sqlite_master WHERE "
+                                                                      "type='table' AND name LIKE "
+                                                                      "'interval_statistics_%';")
+            previous_intervals = []
+            recalc_intervals = None
+            if previous_interval_tables:
+                if not isinstance(previous_interval_tables, list):
+                    previous_interval_tables = [previous_interval_tables]
+                print("There are " + str(len(previous_interval_tables)) + " interval statistics table(s) in the "
+                                                                          "database:")
+                for table in previous_interval_tables:
+                    print(table)
+                    previous_intervals.append(float(table[len("interval_statistics_"):])/1000000)
+                recalc_intervals = recalculate_intervals
+                while recalc_intervals is None and not delete:
+                    user_input = input("Do you want to recalculate them as well? (yes|no|delete): ")
+                    if user_input.lower() == "yes" or user_input.lower() == "y":
+                        recalc_intervals = True
+                    elif user_input.lower() == "no" or user_input.lower() == "n":
+                        recalc_intervals = False
+                    elif user_input.lower() == "delete" or user_input.lower() == "d":
+                        recalc_intervals = False
+                        delete = True
+                    else:
+                        print("This was no valid input.")
+            if intervals is None or intervals is []:
+                intervals = [0.0]
+            elif not isinstance(intervals, list):
+                intervals = [intervals]
+            if recalc_intervals and previous_intervals:
+                intervals = list(set(intervals + previous_intervals))
+            self.pcap_proc.collect_statistics(intervals)
+            self.pcap_proc.write_to_database(self.path_db, intervals, delete)
             outstring_datasource = "by PCAP file processor."
 
             # only print summary of new db if -s flag not set
@@ -189,8 +223,8 @@ class Statistics:
 
         :return: normalized packet rates for each time interval.
         """
-        result = self.process_db_query(
-            "SELECT lastPktTimestamp,pktsCount FROM interval_statistics ORDER BY lastPktTimestamp")
+        result = self.stats_db.process_interval_statistics_query(
+            "SELECT lastPktTimestamp,pktsCount FROM %s ORDER BY lastPktTimestamp")
         # print(result)
         bg_interval_pps = []
         complement_interval_pps = []
@@ -235,8 +269,7 @@ class Statistics:
             return values, freq_output
 
         # Payload Tests
-        sum_payload_count = self.stats_db.process_user_defined_query("SELECT sum(payloadCount) FROM "
-                                                                     "interval_statistics")
+        sum_payload_count = self.stats_db.process_interval_statistics_query("SELECT sum(payloadCount) FROM %s")
         pkt_count = self.stats_db.process_user_defined_query("SELECT packetCount FROM file_statistics")
         if sum_payload_count and pkt_count:
             payload_ratio = 0
@@ -246,10 +279,10 @@ class Statistics:
             payload_ratio = -1
 
         # TCP checksum Tests
-        incorrect_checksum_count = self.stats_db.process_user_defined_query(
-            "SELECT sum(incorrectTCPChecksumCount) FROM interval_statistics")
-        correct_checksum_count = self.stats_db.process_user_defined_query(
-            "SELECT avg(correctTCPChecksumCount) FROM interval_statistics")
+        incorrect_checksum_count = self.stats_db.process_interval_statistics_query(
+            "SELECT sum(incorrectTCPChecksumCount) FROM %s")
+        correct_checksum_count = self.stats_db.process_interval_statistics_query(
+            "SELECT avg(correctTCPChecksumCount) FROM %s")
         if incorrect_checksum_count and correct_checksum_count:
             incorrect_checksum_ratio = 0
             if (incorrect_checksum_count[0][0] + correct_checksum_count[0][0]) != 0:
@@ -268,7 +301,7 @@ class Statistics:
         ip_src_entropy, ip_src_norm_entropy = self.calculate_entropy(src_frequency, True)
         ip_dst_entropy, ip_dst_norm_entropy = self.calculate_entropy(dst_frequency, True)
 
-        new_ip_count = self.stats_db.process_user_defined_query("SELECT newIPCount FROM interval_statistics")
+        new_ip_count = self.stats_db.process_interval_statistics_query("SELECT newIPCount FROM %s")
         ip_novels_per_interval, ip_novels_per_interval_frequency = count_frequncy(new_ip_count)
         ip_novelty_dist_entropy = self.calculate_entropy(ip_novels_per_interval_frequency)
 
@@ -294,7 +327,7 @@ class Statistics:
         for row in result:
             frequency.append(row[1])
         ttl_entropy, ttl_norm_entropy = self.calculate_entropy(frequency, True)
-        new_ttl_count = self.stats_db.process_user_defined_query("SELECT newTTLCount FROM interval_statistics")
+        new_ttl_count = self.stats_db.process_interval_statistics_query("SELECT newTTLCount FROM %s")
         ttl_novels_per_interval, ttl_novels_per_interval_frequency = count_frequncy(new_ttl_count)
         ttl_novelty_dist_entropy = self.calculate_entropy(ttl_novels_per_interval_frequency)
 
@@ -304,7 +337,7 @@ class Statistics:
         for row in result:
             frequency.append(row[1])
         win_entropy, win_norm_entropy = self.calculate_entropy(frequency, True)
-        new_win_size_count = self.stats_db.process_user_defined_query("SELECT newWinSizeCount FROM interval_statistics")
+        new_win_size_count = self.stats_db.process_interval_statistics_query("SELECT newWinSizeCount FROM %s")
         win_novels_per_interval, win_novels_per_interval_frequency = count_frequncy(new_win_size_count)
         win_novelty_dist_entropy = self.calculate_entropy(win_novels_per_interval_frequency)
 
@@ -315,7 +348,7 @@ class Statistics:
         for row in result:
             frequency.append(row[1])
         tos_entropy, tos_norm_entropy = self.calculate_entropy(frequency, True)
-        new_tos_count = self.stats_db.process_user_defined_query("SELECT newToSCount FROM interval_statistics")
+        new_tos_count = self.stats_db.process_interval_statistics_query("SELECT newToSCount FROM %s")
         tos_novels_per_interval, tos_novels_per_interval_frequency = count_frequncy(new_tos_count)
         tos_novelty_dist_entropy = self.calculate_entropy(tos_novels_per_interval_frequency)
 
@@ -326,7 +359,7 @@ class Statistics:
         for row in result:
             frequency.append(row[1])
         mss_entropy, mss_norm_entropy = self.calculate_entropy(frequency, True)
-        new_mss_count = self.stats_db.process_user_defined_query("SELECT newMSSCount FROM interval_statistics")
+        new_mss_count = self.stats_db.process_interval_statistics_query("SELECT newMSSCount FROM %s")
         mss_novels_per_interval, mss_novels_per_interval_frequency = count_frequncy(new_mss_count)
         mss_novelty_dist_entropy = self.calculate_entropy(mss_novels_per_interval_frequency)
 
@@ -1077,8 +1110,8 @@ class Statistics:
             :param file_ending:
             :return:
             """
-            query_output = self.stats_db.process_user_defined_query(
-                "SELECT lastPktTimestamp, pktsCount FROM interval_statistics ORDER BY lastPktTimestamp")
+            query_output = self.stats_db.process_interval_statistics_query(
+                "SELECT lastPktTimestamp, pktsCount FROM %s ORDER BY lastPktTimestamp")
             title = "Packet Rate"
             x_label = "Time Interval"
             y_label = "Number of Packets"
@@ -1091,8 +1124,8 @@ class Statistics:
             :param file_ending:
             :return:
             """
-            query_output = self.stats_db.process_user_defined_query(
-                "SELECT lastPktTimestamp, ipSrcEntropy FROM interval_statistics ORDER BY lastPktTimestamp")
+            query_output = self.stats_db.process_interval_statistics_query(
+                "SELECT lastPktTimestamp, ipSrcEntropy FROM %s ORDER BY lastPktTimestamp")
             title = "Source IP Entropy"
             x_label = "Time Interval"
             y_label = "Entropy"
@@ -1105,8 +1138,8 @@ class Statistics:
             :param file_ending:
             :return:
             """
-            query_output = self.stats_db.process_user_defined_query(
-                "SELECT lastPktTimestamp, ipDstEntropy FROM interval_statistics ORDER BY lastPktTimestamp")
+            query_output = self.stats_db.process_interval_statistics_query(
+                "SELECT lastPktTimestamp, ipDstEntropy FROM %s ORDER BY lastPktTimestamp")
             title = "Destination IP Entropy"
             x_label = "Time Interval"
             y_label = "Entropy"
@@ -1119,8 +1152,8 @@ class Statistics:
             :param file_ending:
             :return:
             """
-            query_output = self.stats_db.process_user_defined_query(
-                "SELECT lastPktTimestamp, newIPCount FROM interval_statistics ORDER BY lastPktTimestamp")
+            query_output = self.stats_db.process_interval_statistics_query(
+                "SELECT lastPktTimestamp, newIPCount FROM %s ORDER BY lastPktTimestamp")
             title = "IP Novelty Distribution"
             x_label = "Time Interval"
             y_label = "Novel values count"
@@ -1133,8 +1166,8 @@ class Statistics:
             :param file_ending:
             :return:
             """
-            query_output = self.stats_db.process_user_defined_query(
-                "SELECT lastPktTimestamp, newPortCount FROM interval_statistics ORDER BY lastPktTimestamp")
+            query_output = self.stats_db.process_interval_statistics_query(
+                "SELECT lastPktTimestamp, newPortCount FROM %s ORDER BY lastPktTimestamp")
             title = "Port Novelty Distribution"
             x_label = "Time Interval"
             y_label = "Novel values count"
@@ -1147,8 +1180,8 @@ class Statistics:
             :param file_ending:
             :return:
             """
-            query_output = self.stats_db.process_user_defined_query(
-                "SELECT lastPktTimestamp, newTTLCount FROM interval_statistics ORDER BY lastPktTimestamp")
+            query_output = self.stats_db.process_interval_statistics_query(
+                "SELECT lastPktTimestamp, newTTLCount FROM %s ORDER BY lastPktTimestamp")
             title = "TTL Novelty Distribution"
             x_label = "Time Interval"
             y_label = "Novel values count"
@@ -1161,8 +1194,8 @@ class Statistics:
             :param file_ending:
             :return:
             """
-            query_output = self.stats_db.process_user_defined_query(
-                "SELECT lastPktTimestamp, newToSCount FROM interval_statistics ORDER BY lastPktTimestamp")
+            query_output = self.stats_db.process_interval_statistics_query(
+                "SELECT lastPktTimestamp, newToSCount FROM %s ORDER BY lastPktTimestamp")
             title = "ToS Novelty Distribution"
             x_label = "Time Interval"
             y_label = "Novel values count"
@@ -1175,8 +1208,8 @@ class Statistics:
             :param file_ending:
             :return:
             """
-            query_output = self.stats_db.process_user_defined_query(
-                "SELECT lastPktTimestamp, newWinSizeCount FROM interval_statistics ORDER BY lastPktTimestamp")
+            query_output = self.stats_db.process_interval_statistics_query(
+                "SELECT lastPktTimestamp, newWinSizeCount FROM %s ORDER BY lastPktTimestamp")
             title = "Window Size Novelty Distribution"
             x_label = "Time Interval"
             y_label = "Novel values count"
@@ -1189,8 +1222,8 @@ class Statistics:
             :param file_ending:
             :return:
             """
-            query_output = self.stats_db.process_user_defined_query(
-                "SELECT lastPktTimestamp, newMSSCount FROM interval_statistics ORDER BY lastPktTimestamp")
+            query_output = self.stats_db.process_interval_statistics_query(
+                "SELECT lastPktTimestamp, newMSSCount FROM %s ORDER BY lastPktTimestamp")
             title = "MSS Novelty Distribution"
             x_label = "Time Interval"
             y_label = "Novel values count"
@@ -1214,8 +1247,8 @@ class Statistics:
                 return None
 
             plt.gcf().clear()
-            result = self.stats_db.process_user_defined_query(
-                "SELECT lastPktTimestamp, ip%sCumEntropy FROM interval_statistics ORDER BY lastPktTimestamp" % sod)
+            result = self.stats_db.process_interval_statistics_query(
+                "SELECT lastPktTimestamp, ip%sCumEntropy FROM %s ORDER BY lastPktTimestamp" % sod)
             graphx, graphy = [], []
             for row in result:
                 graphx.append(row[0])

+ 11 - 0
code/Core/StatsDatabase.py

@@ -365,6 +365,17 @@ class StatsDatabase:
 
         return result
 
+    def process_interval_statistics_query(self, query_string_in: str):
+        """
+
+        :param query_string_in:
+        :return:
+        """
+        table_name = self.process_db_query("SELECT name FROM interval_tables WHERE is_default=1")
+        return self.process_user_defined_query(query_string_in % table_name)
+
+
+
     def _print_query_results(self, query_string_in: str, result: typing.List[typing.Union[str, float, int]]) -> None:
         """
         Prints the results of a query.

+ 6 - 3
code/Test/ID2TAttackTest.py

@@ -32,7 +32,8 @@ class ID2TAttackTest(unittest.TestCase):
         """
 
         controller = Ctrl.Controller(pcap_file_path=pcap, do_extra_tests=False, non_verbose=True)
-        controller.load_pcap_statistics(flag_write_file, flag_recalculate_stats, flag_print_statistics)
+        controller.load_pcap_statistics(flag_write_file, flag_recalculate_stats, flag_print_statistics,
+                                        intervals=[], delete=True)
 
         controller.process_attacks(attack_args, [[seed]], time)
 
@@ -69,7 +70,8 @@ class ID2TAttackTest(unittest.TestCase):
         """
 
         controller = Ctrl.Controller(pcap_file_path=pcap, do_extra_tests=False, non_verbose=True)
-        controller.load_pcap_statistics(flag_write_file, flag_recalculate_stats, flag_print_statistics)
+        controller.load_pcap_statistics(flag_write_file, flag_recalculate_stats, flag_print_statistics,
+                                        intervals=[], delete=True)
 
         if seed is None:
             controller.process_attacks(attack_args, time=True)
@@ -110,7 +112,8 @@ class ID2TAttackTest(unittest.TestCase):
         """
 
         controller = Ctrl.Controller(pcap_file_path=pcap, do_extra_tests=False, non_verbose=True)
-        controller.load_pcap_statistics(flag_write_file, flag_recalculate_stats, flag_print_statistics)
+        controller.load_pcap_statistics(flag_write_file, flag_recalculate_stats, flag_print_statistics,
+                                        intervals=[], delete=True)
         controller.process_attacks(attack_args, [[seed]])
 
         caller_function = inspect.stack()[1].function

+ 2 - 1
code/Test/test_NamedQueries.py

@@ -5,7 +5,8 @@ import ID2TLib.TestLibrary as Lib
 import Core.Controller as Ctrl
 
 controller = Ctrl.Controller(pcap_file_path=Lib.test_pcap, do_extra_tests=False, non_verbose=True)
-controller.load_pcap_statistics(flag_write_file=False, flag_recalculate_stats=True, flag_print_statistics=False)
+controller.load_pcap_statistics(flag_write_file=False, flag_recalculate_stats=True, flag_print_statistics=False,
+                                intervals=[], delete=True)
 
 ip_addresses = ["10.0.2.15",      "104.83.103.45",  "13.107.21.200",  "131.253.61.100", "172.217.23.142",
                 "172.217.23.174", "192.168.33.254", "204.79.197.200", "23.51.123.27",   "35.161.3.50",

+ 2 - 1
code/Test/test_NestedNamedQueries.py

@@ -6,7 +6,8 @@ import ID2TLib.TestLibrary as Lib
 import Core.Controller as Ctrl
 
 controller = Ctrl.Controller(pcap_file_path=Lib.test_pcap, do_extra_tests=False, non_verbose=True)
-controller.load_pcap_statistics(flag_write_file=False, flag_recalculate_stats=True, flag_print_statistics=False)
+controller.load_pcap_statistics(flag_write_file=False, flag_recalculate_stats=True, flag_print_statistics=False,
+                                intervals=[], delete=True)
 
 
 class UnitTestNestedNamedQueries(unittest.TestCase):

+ 2 - 1
code/Test/test_SQLQueries.py

@@ -5,7 +5,8 @@ import ID2TLib.TestLibrary as Lib
 import Core.Controller as Ctrl
 
 controller = Ctrl.Controller(pcap_file_path=Lib.test_pcap, do_extra_tests=False, non_verbose=True)
-controller.load_pcap_statistics(flag_write_file=False, flag_recalculate_stats=True, flag_print_statistics=False)
+controller.load_pcap_statistics(flag_write_file=False, flag_recalculate_stats=True, flag_print_statistics=False,
+                                intervals=[], delete=True)
 
 
 class UnitTestSqlQueries(unittest.TestCase):

+ 2 - 1
code/Test/test_internalQueries.py

@@ -5,7 +5,8 @@ import ID2TLib.TestLibrary as Lib
 import Core.Controller as Ctrl
 
 controller = Ctrl.Controller(pcap_file_path=Lib.test_pcap, do_extra_tests=False, non_verbose=True)
-controller.load_pcap_statistics(flag_write_file=False, flag_recalculate_stats=True, flag_print_statistics=False)
+controller.load_pcap_statistics(flag_write_file=False, flag_recalculate_stats=True, flag_print_statistics=False,
+                                intervals=[], delete=True)
 
 ipAddresses = ['10.0.2.15', '104.83.103.45', '13.107.21.200', '131.253.61.100', '172.217.23.142', '172.217.23.174',
                '192.168.33.254', '204.79.197.200', '23.51.123.27', '35.161.3.50', '52.11.17.245', '52.34.37.177',

+ 1 - 1
code_boost/src/cxx/botnet_comm_processor.cpp

@@ -530,7 +530,7 @@ int botnet_comm_processor::msgtype_is_response(unsigned short mtype){
  * Converts the given vector of communication intervals to a python representation 
  * using (python) lists and (python) tuples.
  * @param intervals The communication intervals to convert.
- * @return A boost::python::list containing the same interval information using boost::python::dict for each interval.
+ * @return A py::list containing the same interval information using boost::python::dict for each interval.
  */
 py::list botnet_comm_processor::convert_intervals_to_py_repr(const std::vector<comm_interval> &intervals){
     py::list py_intervals;

+ 55 - 19
code_boost/src/cxx/pcap_processor.cpp

@@ -155,8 +155,9 @@ bool pcap_processor::read_pcap_info(const std::string &filePath, std::size_t &to
 
 /**
  * Collect statistics of the loaded PCAP file. Calls for each packet the method process_packets.
+ * param: user specified interval in seconds
  */
-void pcap_processor::collect_statistics() {
+void pcap_processor::collect_statistics(const py::list& intervals) {
     // Only process PCAP if file exists
     if (file_exists(filePath)) {
         std::cout << "Loading pcap..." << std::endl;
@@ -171,34 +172,63 @@ void pcap_processor::collect_statistics() {
 
         // choose a suitable time interval
         int timeIntervalCounter = 1;
-        int timeIntervalsNum = 100;
-        std::chrono::microseconds intervalStartTimestamp = stats.getTimestampFirstPacket();
+        long timeInterval_microsec = 0;
+        std::vector<std::chrono::microseconds> intervalStartTimestamp;
         std::chrono::microseconds firstTimestamp = stats.getTimestampFirstPacket();
-        std::chrono::microseconds lastTimestamp = stats.getTimestampLastPacket();
-        std::chrono::microseconds captureDuration = lastTimestamp - firstTimestamp;
-        if(captureDuration.count()<=0){
-            std::cerr << "ERROR: PCAP file is empty!" << std::endl;
-            return;
+
+        std::vector<std::chrono::duration<int, std::micro>> timeIntervals;
+        std::vector<std::chrono::microseconds> barriers;
+
+        if (intervals.size() == 0) {
+            int timeIntervalsNum = 100;
+            std::chrono::microseconds lastTimestamp = stats.getTimestampLastPacket();
+            std::chrono::microseconds captureDuration = lastTimestamp - firstTimestamp;
+            if(captureDuration.count()<=0){
+                std::cerr << "ERROR: PCAP file is empty!" << std::endl;
+                return;
+            }
+            timeInterval_microsec = captureDuration.count() / timeIntervalsNum;
+            stats.setDefaultInterval(static_cast<double>(timeInterval_microsec));
+            intervalStartTimestamp.push_back(firstTimestamp);
+            std::chrono::duration<int, std::micro> timeInterval(timeInterval_microsec);
+            std::chrono::microseconds barrier = timeInterval;
+            timeIntervals.push_back(timeInterval);
+            barriers.push_back(barrier);
+        } else {
+            for (auto interval: intervals) {
+                double interval_double = interval.cast<double>();
+                timeInterval_microsec = static_cast<long>(interval_double * 1000000);
+                intervalStartTimestamp.push_back(firstTimestamp);
+                std::chrono::duration<int, std::micro> timeInterval(timeInterval_microsec);
+                std::chrono::microseconds barrier = timeInterval;
+                timeIntervals.push_back(timeInterval);
+                barriers.push_back(barrier);
+            }
         }
-        long timeInterval_microsec = captureDuration.count() / timeIntervalsNum;
-        std::chrono::duration<int, std::micro> timeInterval(timeInterval_microsec);
-        std::chrono::microseconds barrier = timeInterval;
+
+        std::sort(timeIntervals.begin(), timeIntervals.end());
+        std::sort(barriers.begin(), barriers.end());
 
         std::cout << std::endl;
         std::chrono::system_clock::time_point lastPrinted = std::chrono::system_clock::now();
 
+        int barrier_count = static_cast<int>(barriers.size());
+
         // Iterate over all packets and collect statistics
         for (; i != sniffer.end(); i++) {
             currentPktTimestamp = i->timestamp();
             std::chrono::microseconds currentDuration = currentPktTimestamp - firstTimestamp;
 
             // For each interval
-            if(currentDuration>barrier){
-                stats.addIntervalStat(timeInterval, intervalStartTimestamp, currentPktTimestamp);
-                timeIntervalCounter++;
-
-                barrier =  barrier + timeInterval;
-                intervalStartTimestamp = currentPktTimestamp;
+            // drops last interval too small
+            for (int j = 0; j < barrier_count; j++) {
+                if(currentDuration>barriers[j]){
+                    stats.addIntervalStat(timeIntervals[j], intervalStartTimestamp[j], currentPktTimestamp);
+                    timeIntervalCounter++;
+
+                    barriers[j] =  barriers[j] + timeIntervals[j];
+                    intervalStartTimestamp[j] = currentPktTimestamp;
+                }
             }
 
             stats.incrementPacketCount();
@@ -374,8 +404,14 @@ void pcap_processor::process_packets(const Packet &pkt) {
  * database or, if not present, creates a new database.
  * @param database_path The path to the database file, ending with .sqlite3.
  */
-void pcap_processor::write_to_database(std::string database_path) {
-    stats.writeToDatabase(database_path);
+void pcap_processor::write_to_database(std::string database_path, const py::list& intervals, bool del) {
+    std::vector<std::chrono::duration<int, std::micro>> timeIntervals;
+    for (auto interval: intervals) {
+        double interval_double = interval.cast<double>();
+        std::chrono::duration<int, std::micro> timeInterval(static_cast<long>(interval_double * 1000000));
+        timeIntervals.push_back(timeInterval);
+    }
+    stats.writeToDatabase(database_path, timeIntervals, del);
 }
 
 /**

+ 7 - 2
code_boost/src/cxx/pcap_processor.h

@@ -5,9 +5,11 @@
 #ifndef CPP_PCAPREADER_MAIN_H
 #define CPP_PCAPREADER_MAIN_H
 
+#include <algorithm>
 #include <iomanip>
 #include <tins/tins.h>
 #include <iostream>
+#include <pybind11/pybind11.h>
 #include <time.h>
 #include <stdio.h>
 #include <sys/stat.h>
@@ -15,6 +17,8 @@
 #include "statistics.h"
 #include "statistics_db.h"
 
+namespace py = pybind11;
+
 using namespace Tins;
 
 class pcap_processor {
@@ -31,6 +35,7 @@ public:
     statistics stats;
     std::string filePath;
     bool hasUnrecognized;
+    std::chrono::duration<int, std::micro> timeInterval;
 
     /*
      * Methods
@@ -45,9 +50,9 @@ public:
 
     bool read_pcap_info(const std::string &filePath, std::size_t &totalPakets);
 
-    void collect_statistics();
+    void collect_statistics(const py::list& intervals);
 
-    void write_to_database(std::string database_path);
+    void write_to_database(std::string database_path, const py::list& intervals, bool del);
 
     static int get_db_version() { return statistics_db::DB_VERSION; };
 };

+ 17 - 4
code_boost/src/cxx/statistics.cpp

@@ -180,8 +180,13 @@ void statistics::addIntervalStat(std::chrono::duration<int, std::micro> interval
     std::string  intervalStartTimestamp_s = std::to_string(intervalStartTimestamp.count());
 
     // The intervalStartTimestamp_s is the previous interval lastPktTimestamp_s
+    // TODO: check with carlos if first and last packet timestamps are alright
+    interval_statistics[lastPktTimestamp_s].start = std::to_string(intervalStartTimestamp.count());
+    interval_statistics[lastPktTimestamp_s].end = std::to_string(intervalEndTimestamp.count());
     interval_statistics[lastPktTimestamp_s].pkts_count = packetCount - intervalCumPktCount;
-    interval_statistics[lastPktTimestamp_s].kbytes = (float(sumPacketSize - intervalCumSumPktSize) / 1024);
+    interval_statistics[lastPktTimestamp_s].pkt_rate = static_cast<float>(interval_statistics[lastPktTimestamp_s].pkts_count) / (static_cast<double>(interval.count()) / 1000000);
+    interval_statistics[lastPktTimestamp_s].kbytes = static_cast<float>(sumPacketSize - intervalCumSumPktSize) / 1024;
+    interval_statistics[lastPktTimestamp_s].kbyte_rate = interval_statistics[lastPktTimestamp_s].kbytes / (static_cast<double>(interval.count()) / 1000000);
 
     interval_statistics[lastPktTimestamp_s].payload_count = payloadCount - intervalPayloadCount;
     interval_statistics[lastPktTimestamp_s].incorrect_tcp_checksum_count = incorrectTCPChecksumCount - intervalIncorrectTCPChecksumCount;
@@ -214,7 +219,7 @@ void statistics::addIntervalStat(std::chrono::duration<int, std::micro> interval
         interval_statistics[lastPktTimestamp_s].ip_src_cum_entropy = ipCumEntopies[0];
         interval_statistics[lastPktTimestamp_s].ip_dst_cum_entropy = ipCumEntopies[1];
     }
-}        
+}
 
 /**
  * Registers statistical data for a sent packet in a given conversation (two IPs, two ports). 
@@ -692,6 +697,14 @@ ip_stats statistics::getStatsForIP(const std::string &ipAddress) {
     return s;
 }
 
+int statistics::getDefaultInterval() {
+    return this->default_interval;
+}
+
+void statistics::setDefaultInterval(int interval) {
+    this->default_interval = interval;
+}
+
 /**
  * Increments the packet counter.
  */
@@ -736,7 +749,7 @@ void statistics::printStats(const std::string &ipAddress) {
  * writes all data into a SQLite database, located at database_path.
  * @param database_path The path of the SQLite database file ending with .sqlite3.
  */
-void statistics::writeToDatabase(std::string database_path) {
+void statistics::writeToDatabase(std::string database_path, std::vector<std::chrono::duration<int, std::micro>> timeIntervals, bool del) {
     // Generate general file statistics
     float duration = getCaptureDurationSeconds();
     long sumPacketsSent = 0, senderCountIP = 0;
@@ -774,7 +787,7 @@ void statistics::writeToDatabase(std::string database_path) {
         db.writeStatisticsWin(win_distribution);
         db.writeStatisticsConv(conv_statistics);
         db.writeStatisticsConvExt(conv_statistics_extended);
-        db.writeStatisticsInterval(interval_statistics);
+        db.writeStatisticsInterval(interval_statistics, timeIntervals, del, this->default_interval);
         db.writeDbVersion();
         db.writeStatisticsUnrecognizedPDUs(unrecognized_PDUs);
     }

+ 17 - 2
code_boost/src/cxx/statistics.h

@@ -236,8 +236,12 @@ struct entry_protocolStat {
  * - # novel MSS
  */
 struct entry_intervalStat {
+    std::string start;
+    std::string end;
     int pkts_count;
+    float pkt_rate;
     float kbytes;
+    float kbyte_rate;
     float ip_src_entropy; 
     float ip_dst_entropy;
     float ip_src_cum_entropy; 
@@ -253,8 +257,12 @@ struct entry_intervalStat {
     int novel_port_count;
 
     bool operator==(const entry_intervalStat &other) const {
-        return pkts_count == other.pkts_count
+        return start == other.start
+               && end == other.end
+               && pkts_count == other.pkts_count
+               && pkt_rate == other.pkt_rate
                && kbytes == other.kbytes
+               && kbyte_rate == other.kbyte_rate
                && ip_src_entropy == other.ip_src_entropy
                && ip_dst_entropy == other.ip_dst_entropy
                && ip_src_cum_entropy == other.ip_src_cum_entropy
@@ -353,6 +361,7 @@ struct entry_convStatExt {
     double avg_time_between_ints;
     double avg_interval_time;
     double total_comm_duration;
+    std::chrono::duration<int, std::micro> timeInterval;
     std::vector<std::chrono::microseconds> pkts_timestamp;
     std::vector<std::chrono::microseconds> interarrival_time;
     std::chrono::microseconds avg_interarrival_time;
@@ -589,7 +598,7 @@ public:
 
     void addMSS(const std::string &ipAddress, int MSSvalue);
 
-    void writeToDatabase(std::string database_path);
+    void writeToDatabase(std::string database_path, std::vector<std::chrono::duration<int, std::micro>> timeInterval, bool del);
 
     void addPacketSize(uint32_t packetSize);
 
@@ -605,6 +614,10 @@ public:
 
     void setDoExtraTests(bool var);
 
+    int getDefaultInterval();
+
+    void setDefaultInterval(int interval);
+
     /*
      * IP Address-specific statistics
      */
@@ -645,6 +658,8 @@ private:
     int intervalCumNovelMSSCount = 0;
     int intervalCumNovelPortCount = 0;
 
+    int default_interval = 0;
+
 
     /*
      * Data containers

+ 106 - 59
code_boost/src/cxx/statistics_db.cpp

@@ -407,7 +407,7 @@ void statistics_db::writeStatisticsConv(std::unordered_map<conv, entry_convStat>
         const char *createTable = "CREATE TABLE conv_statistics ("
                 "ipAddressA TEXT,"
                 "portA INTEGER,"
-                "ipAddressB TEXT,"              
+                "ipAddressB TEXT,"
                 "portB INTEGER,"
                 "pktsCount INTEGER,"
                 "avgPktRate REAL,"
@@ -475,7 +475,7 @@ void statistics_db::writeStatisticsConvExt(std::unordered_map<convWithProt, entr
         const char *createTable = "CREATE TABLE conv_statistics_extended ("
                 "ipAddressA TEXT,"
                 "portA INTEGER,"
-                "ipAddressB TEXT,"              
+                "ipAddressB TEXT,"
                 "portB INTEGER,"
                 "protocol TEXT COLLATE NOCASE,"
                 "pktsCount INTEGER,"
@@ -509,13 +509,13 @@ void statistics_db::writeStatisticsConvExt(std::unordered_map<convWithProt, entr
                 }
                 if (e.interarrival_time.size() > 0)
                     e.avg_interarrival_time = (std::chrono::microseconds) sumDelay / e.interarrival_time.size(); // average
-                else 
+                else
                     e.avg_interarrival_time = (std::chrono::microseconds) 0;
             }
 
-            if (e.total_comm_duration == 0) 
+            if (e.total_comm_duration == 0)
                 e.avg_pkt_rate = e.pkts_count; // pkt per sec
-            else 
+            else
                 e.avg_pkt_rate = e.pkts_count / e.total_comm_duration;
 
             if (e.avg_int_pkts_count > 0){
@@ -551,7 +551,7 @@ void statistics_db::writeStatisticsConvExt(std::unordered_map<convWithProt, entr
 
                 if (PyErr_CheckSignals()) throw py::error_already_set();
             }
-            
+
         }
         transaction.commit();
     }
@@ -564,55 +564,102 @@ void statistics_db::writeStatisticsConvExt(std::unordered_map<convWithProt, entr
  * Writes the interval statistics into the database.
  * @param intervalStatistics The interval entries from class statistics.
  */
-void statistics_db::writeStatisticsInterval(const std::unordered_map<std::string, entry_intervalStat> &intervalStatistics){
-    try {        
+void statistics_db::writeStatisticsInterval(const std::unordered_map<std::string, entry_intervalStat> &intervalStatistics, std::vector<std::chrono::duration<int, std::micro>> timeIntervals, bool del, int defaultInterval){
+    try {
+        // remove old tables produced by prior database versions
         db->exec("DROP TABLE IF EXISTS interval_statistics");
-        SQLite::Transaction transaction(*db);
-        const char *createTable = "CREATE TABLE interval_statistics ("
-                "lastPktTimestamp TEXT,"
-                "pktsCount INTEGER,"
-                "kBytes REAL,"
-                "ipSrcEntropy REAL,"      
-                "ipDstEntropy REAL,"  
-                "ipSrcCumEntropy REAL,"      
-                "ipDstCumEntropy REAL,"
-                "payloadCount INTEGER,"
-                "incorrectTCPChecksumCount INTEGER,"
-                "correctTCPChecksumCount INTEGER,"
-                "newIPCount INTEGER,"
-                "newPortCount INTEGER,"
-                "newTTLCount INTEGER,"
-                "newWinSizeCount INTEGER,"
-                "newToSCount INTEGER,"
-                "newMSSCount INTEGER,"
-                "PRIMARY KEY(lastPktTimestamp));";
-        db->exec(createTable);
-        SQLite::Statement query(*db, "INSERT INTO interval_statistics VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
-        for (auto it = intervalStatistics.begin(); it != intervalStatistics.end(); ++it) {
-            const entry_intervalStat &e = it->second;
-            
-            query.bindNoCopy(1, it->first);
-            query.bind(2, (int)e.pkts_count);
-            query.bind(3, e.kbytes);
-            query.bind(4, e.ip_src_entropy);
-            query.bind(5, e.ip_dst_entropy);
-            query.bind(6, e.ip_src_cum_entropy);
-            query.bind(7, e.ip_dst_cum_entropy);
-            query.bind(8, e.payload_count);
-            query.bind(9, e.incorrect_tcp_checksum_count);
-            query.bind(10, e.correct_tcp_checksum_count);
-            query.bind(11, e.novel_ip_count);
-            query.bind(12, e.novel_port_count);
-            query.bind(13, e.novel_ttl_count);
-            query.bind(14, e.novel_win_size_count);
-            query.bind(15, e.novel_tos_count);
-            query.bind(16, e.novel_mss_count);
-            query.exec();
-            query.reset();
 
-            if (PyErr_CheckSignals()) throw py::error_already_set();
+        if (del) {
+            SQLite::Statement query(*db, "SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'interval_statistics_%';");
+            std::vector<std::string> previous_tables;
+            while (query.executeStep()) {
+                previous_tables.push_back(query.getColumn(0));
+            }
+            for (std::string table: previous_tables) {
+                db->exec("DROP TABLE IF EXISTS " + table);
+            }
+            db->exec("DROP TABLE IF EXISTS interval_tables");
+        }
+
+        db->exec("CREATE TABLE IF NOT EXISTS interval_tables (name TEXT, is_default INTEGER);");
+        std::string is_default = "0";
+
+        if (defaultInterval != 0.0) {
+            is_default = "1";
+            std::chrono::duration<int, std::micro> defaultTimeInterval(defaultInterval);
+            if (timeIntervals.empty()) {
+                timeIntervals.push_back(defaultTimeInterval);
+            }
+        }
+
+        for (auto timeInterval: timeIntervals) {
+            std::ostringstream strs;
+            strs << timeInterval.count();
+            std::string table_name = "interval_statistics_" + strs.str();
+
+            // add interval_tables entry
+            db->exec("DELETE FROM interval_tables WHERE name = '" + table_name + "';");
+            db->exec("INSERT INTO interval_tables VALUES ('" + table_name + "', '" + is_default + "');");
+
+            is_default = "0";
+
+            // new interval statistics implementation
+            db->exec("DROP TABLE IF EXISTS " + table_name);
+            SQLite::Transaction transaction(*db);
+            db->exec("CREATE TABLE " + table_name + " ("
+                    "lastPktTimestamp TEXT,"
+                    "startTimestamp TEXT,"
+                    "endTimestamp TEXT,"
+                    "pktsCount INTEGER,"
+                    "pktRate REAL,"
+                    "kBytes REAL,"
+                    "kByteRate REAL,"
+                    "ipSrcEntropy REAL,"
+                    "ipDstEntropy REAL,"
+                    "ipSrcCumEntropy REAL,"
+                    "ipDstCumEntropy REAL,"
+                    "payloadCount INTEGER,"
+                    "incorrectTCPChecksumCount INTEGER,"
+                    "correctTCPChecksumCount INTEGER,"
+                    "newIPCount INTEGER,"
+                    "newPortCount INTEGER,"
+                    "newTTLCount INTEGER,"
+                    "newWinSizeCount INTEGER,"
+                    "newToSCount INTEGER,"
+                    "newMSSCount INTEGER,"
+                    "PRIMARY KEY(lastPktTimestamp));");
+
+            SQLite::Statement query(*db, "INSERT INTO " + table_name + " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
+            for (auto it = intervalStatistics.begin(); it != intervalStatistics.end(); ++it) {
+                const entry_intervalStat &e = it->second;
+
+                query.bindNoCopy(1, it->first);
+                query.bind(2, e.start);
+                query.bind(3, e.end);
+                query.bind(4, (int)e.pkts_count);
+                query.bind(5, e.pkt_rate);
+                query.bind(6, e.kbytes);
+                query.bind(7, e.kbyte_rate);
+                query.bind(8, e.ip_src_entropy);
+                query.bind(9, e.ip_dst_entropy);
+                query.bind(10, e.ip_src_cum_entropy);
+                query.bind(11, e.ip_dst_cum_entropy);
+                query.bind(12, e.payload_count);
+                query.bind(13, e.incorrect_tcp_checksum_count);
+                query.bind(14, e.correct_tcp_checksum_count);
+                query.bind(15, e.novel_ip_count);
+                query.bind(16, e.novel_port_count);
+                query.bind(17, e.novel_ttl_count);
+                query.bind(18, e.novel_win_size_count);
+                query.bind(19, e.novel_tos_count);
+                query.bind(20, e.novel_mss_count);
+                query.exec();
+                query.reset();
+
+                if (PyErr_CheckSignals()) throw py::error_already_set();
+            }
+            transaction.commit();
         }
-        transaction.commit();
     }
     catch (std::exception &e) {
         std::cerr << "Exception in statistics_db::" << __func__ << ": " << e.what() << std::endl;
@@ -620,13 +667,13 @@ void statistics_db::writeStatisticsInterval(const std::unordered_map<std::string
 }
 
 void statistics_db::writeDbVersion(){
-	try {
-		SQLite::Transaction transaction(*db);
-		SQLite::Statement query(*db, std::string("PRAGMA user_version = ") + std::to_string(DB_VERSION) + ";");
-		query.exec();
-		transaction.commit();
-	}
-	catch (std::exception &e) {
+    try {
+        SQLite::Transaction transaction(*db);
+        SQLite::Statement query(*db, std::string("PRAGMA user_version = ") + std::to_string(DB_VERSION) + ";");
+        query.exec();
+        transaction.commit();
+    }
+    catch (std::exception &e) {
         std::cerr << "Exception in statistics_db::" << __func__ << ": " << e.what() << std::endl;
     }
 }

+ 2 - 2
code_boost/src/cxx/statistics_db.h

@@ -22,7 +22,7 @@ public:
     /*
      * Database version: Increment number on every change in the C++ code!
      */
-    static const int DB_VERSION = 13;
+    static const int DB_VERSION = 14;
 
     /*
      * Methods for writing values into database
@@ -54,7 +54,7 @@ public:
 
     void writeStatisticsConvExt(std::unordered_map<convWithProt, entry_convStatExt> &conv_statistics_extended);
 
-    void writeStatisticsInterval(const std::unordered_map<std::string, entry_intervalStat> &intervalStatistics);
+    void writeStatisticsInterval(const std::unordered_map<std::string, entry_intervalStat> &intervalStatistics, std::vector<std::chrono::duration<int, std::micro>> timeInterval, bool del, int defaultInterval);
 
     void writeDbVersion();