Browse Source

Merge two statistics tables

Merge the two statistics tables conv_statistics_stateless
and comm_interval_statistics
dustin.born 6 years ago
parent
commit
2e31e8adad

+ 11 - 11
code/ID2TLib/Statistics.py

@@ -553,9 +553,9 @@ class Statistics:
         """
 
         in_degree_raw = self.stats_db._process_user_defined_query(
-                "SELECT ipAddressA, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics_stateless ON ipAddress = ipAddressA WHERE portDirection=\'in\' AND portNumber = portA GROUP BY ipAddress " +
+                "SELECT ipAddressA, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics_extended ON ipAddress = ipAddressA WHERE portDirection=\'in\' AND portNumber = portA GROUP BY ipAddress " +
                 "UNION " +
-                "SELECT ipAddressB, Count(DISTINCT ipAddressA) FROM ip_ports JOIN conv_statistics_stateless ON ipAddress = ipAddressB WHERE portDirection=\'in\' AND portNumber = portB GROUP BY ipAddress")
+                "SELECT ipAddressB, Count(DISTINCT ipAddressA) FROM ip_ports JOIN conv_statistics_extended ON ipAddress = ipAddressB WHERE portDirection=\'in\' AND portNumber = portB GROUP BY ipAddress")
 
         #Because of the structure of the database, there could be 2 entries for the same IP Address, therefore accumulate their sums
         in_degree = self.filter_multiples(in_degree_raw)
@@ -569,9 +569,9 @@ class Statistics:
         """
         
         out_degree_raw = self.stats_db._process_user_defined_query(
-                "SELECT ipAddressA, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics_stateless ON ipAddress = ipAddressA WHERE portDirection=\'out\' AND portNumber = portA GROUP BY ipAddress " +
+                "SELECT ipAddressA, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics_extended ON ipAddress = ipAddressA WHERE portDirection=\'out\' AND portNumber = portA GROUP BY ipAddress " +
                 "UNION " +
-                "SELECT ipAddressB, Count(DISTINCT ipAddressA) FROM ip_ports JOIN conv_statistics_stateless ON ipAddress = ipAddressB WHERE portDirection=\'out\' AND portNumber = portB GROUP BY ipAddress")
+                "SELECT ipAddressB, Count(DISTINCT ipAddressA) FROM ip_ports JOIN conv_statistics_extended ON ipAddress = ipAddressB WHERE portDirection=\'out\' AND portNumber = portB GROUP BY ipAddress")
 
         #Because of the structure of the database, there could be 2 entries for the same IP Address, therefore accumulate their sums
         out_degree = self.filter_multiples(out_degree_raw)
@@ -1237,7 +1237,7 @@ class Statistics:
             else:
                 print("Error: No statistics Information for plotting overall-degrees found")
 
-        def plot_big_comm_interval_stat(attr:str, table:str, title:str, xlabel:str, suffix:str):
+        def plot_big_conv_ext_stat(attr:str, title:str, xlabel:str, suffix:str):
             """
             Plots the desired statistc per connection as horizontal bar plot. 
             Included are 'half-open' connections, where only one packet is exchanged.
@@ -1254,7 +1254,7 @@ class Statistics:
             """
             plt.gcf().clear()
             result = self.stats_db._process_user_defined_query(
-                "SELECT ipAddressA, portA, ipAddressB, portB, %s FROM %s" % (attr, table))
+                "SELECT ipAddressA, portA, ipAddressB, portB, %s FROM conv_statistics_extended" % attr)
 
             if (result):
                 graphy, graphx = [], []
@@ -1336,7 +1336,7 @@ class Statistics:
             suffix = '_plot-PktCount per Connection Distribution' + file_ending
 
             # plot data and return outpath
-            return plot_big_comm_interval_stat("pktsCount", "conv_statistics_stateless", title, "Number of packets", suffix)
+            return plot_big_conv_ext_stat("pktsCount", title, "Number of packets", suffix)
 
         def plot_avg_pkts_per_comm_interval(file_ending: str):
             """
@@ -1350,7 +1350,7 @@ class Statistics:
             suffix = '_plot-Avg PktCount Communication Interval Distribution' + file_ending
 
             # plot data and return outpath
-            return plot_big_comm_interval_stat("avgPktCount", "comm_interval_statistics" ,title, "Number of packets", suffix)
+            return plot_big_conv_ext_stat("avgIntervalPktCount", title, "Number of packets", suffix)
 
         def plot_avg_time_between_comm_interval(file_ending: str):
             """
@@ -1364,7 +1364,7 @@ class Statistics:
             suffix = '_plot-Avg Time Between Communication Intervals Distribution' + file_ending
 
             # plot data and return outpath
-            return plot_big_comm_interval_stat("avgTimeBetweenIntervals", "comm_interval_statistics", title, 'Average time between intervals', suffix)
+            return plot_big_conv_ext_stat("avgTimeBetweenIntervals", title, 'Average time between intervals', suffix)
 
         def plot_avg_comm_interval_time(file_ending: str):
             """
@@ -1378,7 +1378,7 @@ class Statistics:
             suffix = '_plot-Avg Duration Communication Interval Distribution' + file_ending
 
             # plot data and return outpath
-            return plot_big_comm_interval_stat("avgIntervalTime", "comm_interval_statistics", title, 'Average interval time', suffix)
+            return plot_big_conv_ext_stat("avgIntervalTime", title, 'Average interval time', suffix)
 
         def plot_total_comm_duration(file_ending: str):
             """
@@ -1392,7 +1392,7 @@ class Statistics:
             suffix = '_plot-Total Communication Duration Distribution' + file_ending
 
             # plot data and return outpath
-            return plot_big_comm_interval_stat("totalCommDuration", "comm_interval_statistics", title, 'Duration', suffix)
+            return plot_big_conv_ext_stat("totalConversationDuration", title, 'Duration', suffix)
 
 
         ttl_out_path = plot_ttl('.' + format)

+ 4 - 9
code_boost/src/cxx/pcap_processor.cpp

@@ -159,7 +159,7 @@ void pcap_processor::collect_statistics() {
         // Save timestamp of last packet into statistics
         stats.setTimestampLastPacket(currentPktTimestamp);
 
-        // Create the communication interval statistics from all gathered communication intervals
+        // Create the communication interval statistics from the gathered communication intervals within every extended conversation statistic
         stats.createCommIntervalStats();
     }
 }
@@ -256,10 +256,7 @@ void pcap_processor::process_packets(const Packet &pkt) {
 
             // Conversation statistics
             stats.addConvStat(ipAddressSender, tcpPkt.sport(), ipAddressReceiver, tcpPkt.dport(), pkt.timestamp());
-            stats.addConvStatStateless(ipAddressSender, tcpPkt.sport(), ipAddressReceiver, tcpPkt.dport(), pkt.timestamp()); 
-
-            // Communication interval data collection for the later created communication statistics
-            stats.addCommInterval(ipAddressSender, tcpPkt.sport(), ipAddressReceiver, tcpPkt.dport(), pkt.timestamp());
+            stats.addConvStatExt(ipAddressSender,tcpPkt.sport(), ipAddressReceiver, tcpPkt.dport(), "TCP", pkt.timestamp());
 
             // Window Size distribution
             int win = tcpPkt.window();
@@ -281,12 +278,10 @@ void pcap_processor::process_packets(const Packet &pkt) {
             stats.incrementProtocolCount(ipAddressSender, "UDP");   
 
             // Conversation statistics
-            stats.addConvStatStateless(ipAddressSender, udpPkt.sport(), ipAddressReceiver, udpPkt.dport(), pkt.timestamp());           
+            stats.addConvStatExt(ipAddressSender,udpPkt.sport(), ipAddressReceiver, udpPkt.dport(), "UDP", pkt.timestamp());
+
             stats.incrementPortCount(ipAddressSender, udpPkt.sport(), ipAddressReceiver, udpPkt.dport());      
 
-            // Communication interval data collection for the later created communication statistics
-            stats.addCommInterval(ipAddressSender, udpPkt.sport(), ipAddressReceiver, udpPkt.dport(), pkt.timestamp());       
-          
         } else if (p == PDU::PDUType::ICMP) {
             stats.incrementProtocolCount(ipAddressSender, "ICMP");
         } else if (p == PDU::PDUType::ICMPv6) {

+ 43 - 68
code_boost/src/cxx/statistics.cpp

@@ -248,101 +248,78 @@ void statistics::addConvStat(std::string ipAddressSender,int sport,std::string i
 }
 
 /**
- * Registers statistical data for a sent packet in a given stateless conversation (two IPs, two ports). 
+ * Registers statistical data for a sent packet in a given extended conversation (two IPs, two ports, protocol). 
  * Increments the counter packets_A_B or packets_B_A.
  * Adds the timestamp of the packet in pkts_A_B_timestamp or pkts_B_A_timestamp.
+ * Updates all other statistics of conv_statistics_extended
  * @param ipAddressSender The sender IP address.
  * @param sport The source port.
  * @param ipAddressReceiver The receiver IP address.
  * @param dport The destination port.
+ * @param protocol The used protocol.
  * @param timestamp The timestamp of the packet.
  */
-void statistics::addConvStatStateless(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp){
+void statistics::addConvStatExt(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport,std::string protocol, std::chrono::microseconds timestamp){
+    convWithProt f1 = {ipAddressReceiver, dport, ipAddressSender, sport, protocol};
+    convWithProt f2 = {ipAddressSender, sport, ipAddressReceiver, dport, protocol};
+    convWithProt f;
 
-    conv f1 = {ipAddressReceiver, dport, ipAddressSender, sport};
-    conv f2 = {ipAddressSender, sport, ipAddressReceiver, dport};
-
-    // if already exist A(ipAddressReceiver, dport), B(ipAddressSender, sport) conversation
-    if (conv_statistics_stateless.count(f1)>0){
-        conv_statistics_stateless[f1].pkts_count++;
-        if(conv_statistics_stateless[f1].pkts_count<=3)
-            conv_statistics_stateless[f1].interarrival_time.push_back(std::chrono::duration_cast<std::chrono::microseconds> (timestamp - conv_statistics_stateless[f1].pkts_timestamp.back()));
-        conv_statistics_stateless[f1].pkts_timestamp.push_back(timestamp);
-    }
-    // Add new conversation A(ipAddressSender, sport), B(ipAddressReceiver, dport)
-    else{
-        conv_statistics_stateless[f2].pkts_count++;
-        if(conv_statistics_stateless[f2].pkts_timestamp.size()>0 && conv_statistics_stateless[f2].pkts_count<=3 )
-            conv_statistics_stateless[f2].interarrival_time.push_back(std::chrono::duration_cast<std::chrono::microseconds> (timestamp - conv_statistics_stateless[f2].pkts_timestamp.back()));
-        conv_statistics_stateless[f2].pkts_timestamp.push_back(timestamp);
-    }
-}
+    // if there already exists a communication interval for the specified conversation
+    if (conv_statistics_extended.count(f1) > 0 || conv_statistics_extended.count(f2) > 0){
 
-/**
- * Adds the passed information to the relevant communication intervals of the respective conversation.
- * If the time between the last message of the latest interval and the timestamp of the current message exceeds
- * the threshold, a new interval is created.
- * Note: here and within the function, conversation refers to a stateless conversation.
- * @param ipAddressSender The sender IP address.
- * @param sport The source port.
- * @param ipAddressReceiver The receiver IP address.
- * @param dport The destination port.
- * @param timestamp The timestamp of the packet.
- */
-
-void statistics::addCommInterval(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp){
-    conv f1 = {ipAddressReceiver, dport, ipAddressSender, sport};
-    conv f2 = {ipAddressSender, sport, ipAddressReceiver, dport};
-    conv f;
-
-    // if there already exists a communication interval for the specified conversation ...
-    if (comm_intervals.count(f1) > 0 || comm_intervals.count(f2) > 0){
-
-        // find out which direction of conversation is contained in comm_intervals
-        if (comm_intervals.count(f1) > 0)
+        // find out which direction of conversation is contained in conv_statistics_extended
+        if (conv_statistics_extended.count(f1) > 0)
             f = f1;
         else
             f = f2;
 
-        // if the time difference is exceeded, create a new interval with this message
-        if (timestamp - comm_intervals[f].back().end > (std::chrono::microseconds) ((unsigned long) COMM_INTERVAL_THRESHOLD)) {  // > or >= ?
+        // increase pkts count and check on delay
+        conv_statistics_extended[f].pkts_count++;
+        if (conv_statistics_extended[f].pkts_timestamp.size()>0 && conv_statistics_extended[f].pkts_count<=3)
+            conv_statistics_extended[f].interarrival_time.push_back(std::chrono::duration_cast<std::chrono::microseconds> (timestamp - conv_statistics_extended[f].pkts_timestamp.back()));
+        conv_statistics_extended[f].pkts_timestamp.push_back(timestamp);
+
+        // if the time difference has exceeded the threshold, create a new interval with this message
+        if (timestamp - conv_statistics_extended[f].comm_intervals.back().end > (std::chrono::microseconds) ((unsigned long) COMM_INTERVAL_THRESHOLD)) {  // > or >= ?
             commInterval new_interval = {timestamp, timestamp, 1};
-            comm_intervals[f].push_back(new_interval);
+            conv_statistics_extended[f].comm_intervals.push_back(new_interval);
         }  
         // otherwise, set the time of the last interval message to the current timestamp and increase interval packet count by 1
         else{
-            comm_intervals[f].back().end = timestamp;
-            comm_intervals[f].back().pkts_count++;
+            conv_statistics_extended[f].comm_intervals.back().end = timestamp;
+            conv_statistics_extended[f].comm_intervals.back().pkts_count++;
         }
     }
-    // if there does not exist a communication interval for the specified conversation ...
+    // if there does not exist a communication interval for the specified conversation
     else{
-        // add initial interval for this conversation
+        // add initial interval entry for this conversation
         commInterval initial_interval = {timestamp, timestamp, 1};
 
-        std::vector<commInterval> intervals;
-        intervals.push_back(initial_interval);
-        comm_intervals[f1] = intervals;
+        entry_convStatExt entry;
+        entry.comm_intervals.push_back(initial_interval);
+        entry.pkts_count = 1;
+        entry.pkts_timestamp.push_back(timestamp);
+        conv_statistics_extended[f2] = entry;
     }
 }
 
 /**
- * Aggregate the collected information about all communication intervals of every conversation.
+ * Aggregate the collected information about all communication intervals within conv_statistics_extended of every conversation.
  * Do this by computing the average packet rate per interval and the average time between intervals.
  * Also compute average interval duration and total communication duration (i.e. last_msg.time - first_msg.time)
- * Note: here and within the function, conversation refers to a stateless conversation.
  */
 void statistics::createCommIntervalStats(){    
-    // iterate over all <conv, conv_intervals> pairs
-    for (auto &cur_elem : comm_intervals) {
-        conv cur_conv = cur_elem.first;
-        std::vector<commInterval> intervals = cur_elem.second;
+    // iterate over all <convWithProt, entry_convStatExt> pairs
+    for (auto &cur_elem : conv_statistics_extended) {
+        entry_convStatExt &entry = cur_elem.second;
+        std::vector<commInterval> &intervals = entry.comm_intervals;
 
         // if there is only one interval, the time between intervals cannot be computed and is therefore set to 0
         if (intervals.size() == 1){
             double interval_duration = (double) (intervals[0].end - intervals[0].start).count() / (double) 1e6;
-            entry_commIntervalStat e = {(double) intervals[0].pkts_count, (double) 0, interval_duration, interval_duration};
-            comm_interval_statistics[cur_conv] = e;
+            entry.avg_int_pkts_count = (double) intervals[0].pkts_count;
+            entry.avg_time_between_ints = (double) 0;
+            entry.avg_interval_time = interval_duration;
         }
         // If there is more than one interval, compute the specified averages
         else if (intervals.size() > 1){
@@ -356,13 +333,12 @@ void statistics::createCommIntervalStats(){
                 time_between_ints_sum += intervals[i].start - intervals[i - 1].end;
             }
 
-            double avg_pkts_count = summed_pkts_count / ((double) intervals.size());
-            double avg_time_betw_ints = (time_between_ints_sum.count() / (double) (intervals.size() - 1)) / (double) 1e6;
-            double avg_interval_time = (summed_int_duration.count() / (double) intervals.size()) / (double) 1e6;
-            double total_comm_duration = (double) (intervals.back().end - intervals.front().start).count() / (double) 1e6;
-            entry_commIntervalStat e = {avg_pkts_count, avg_time_betw_ints, avg_interval_time, total_comm_duration};
-            comm_interval_statistics[cur_conv] = e;
+            entry.avg_int_pkts_count = summed_pkts_count / ((double) intervals.size());
+            entry.avg_time_between_ints = (time_between_ints_sum.count() / (double) (intervals.size() - 1)) / (double) 1e6;
+            entry.avg_interval_time = (summed_int_duration.count() / (double) intervals.size()) / (double) 1e6;
+
         }
+        entry.total_comm_duration = (double) (entry.pkts_timestamp.back() - entry.pkts_timestamp.front()).count() / (double) 1e6;
     }
 }
 
@@ -720,9 +696,8 @@ void statistics::writeToDatabase(std::string database_path) {
         db.writeStatisticsToS(tos_distribution);
         db.writeStatisticsWin(win_distribution);
         db.writeStatisticsConv(conv_statistics);
-        db.writeStatisticsConvStateless(conv_statistics_stateless);
+        db.writeStatisticsConvExt(conv_statistics_extended);
         db.writeStatisticsInterval(interval_statistics);
-        db.writeCommIntervalStats(comm_interval_statistics);
     }
     else {
         // Tinslib failed to recognize the types of the packets in the input PCAP

+ 72 - 27
code_boost/src/cxx/statistics.h

@@ -62,6 +62,30 @@ struct conv{
     }    
 };
 
+/*
+ * Struct used to represent a conversation by:
+ * - IP address A
+ * - Port A
+ * - IP address B
+ * - Port B
+ * - Protocol
+ */
+struct convWithProt{
+    std::string ipAddressA;
+    int portA;
+    std::string ipAddressB;
+    int portB;
+    std::string protocol;
+
+    bool operator==(const convWithProt &other) const {
+        return ipAddressA == other.ipAddressA
+               && portA == other.portA
+               &&ipAddressB == other.ipAddressB
+               && portB == other.portB
+               && protocol == other.protocol;
+    }    
+};
+
 /*
  * Struct used to represent:
  * - IP address (IPv4 or IPv6)
@@ -284,28 +308,44 @@ struct commInterval{
 };
 
 /*
- * Struct used to represent for the communication intervals of two hosts:
+ * Struct used to represent converstaion statistics:
+ * - commnication intervals
+ * - # packets
+ * - Average packet rate
+ * - average # packets per communication interval
  * - Average time between intervals
- * - The average count of packets within an interval
- * - The average duration of a communication interval
- * - The overall communication time, i.e. last_msg.time - first_msg.time 
- * Note: total_comm_duration != sum of all interval durations
+ * - Average duration of a communication interval
+ * - Overall communication duration
+ * - Timestamps of packets
+ * - Inter-arrival time
+ * - Average inter-arrival time
  */
-struct entry_commIntervalStat{
-    double avg_pkts_count;
-    double avg_time_between;
+struct entry_convStatExt {
+    std::vector<commInterval> comm_intervals;
+    long pkts_count;
+    float avg_pkt_rate;
+    double avg_int_pkts_count;
+    double avg_time_between_ints;
     double avg_interval_time;
     double total_comm_duration;
+    std::vector<std::chrono::microseconds> pkts_timestamp;
+    std::vector<std::chrono::microseconds> interarrival_time;
+    std::chrono::microseconds avg_interarrival_time;
 
-    bool operator==(const entry_commIntervalStat &other) const {
-        return avg_pkts_count == other.avg_pkts_count
-               && avg_time_between == other.avg_time_between
+    bool operator==(const entry_convStatExt &other) const {
+        return comm_intervals == other.comm_intervals
+               && pkts_count == other.pkts_count
+               && avg_pkt_rate == avg_pkt_rate
+               && avg_int_pkts_count == other.avg_int_pkts_count
+               && avg_time_between_ints == other.avg_time_between_ints
                && avg_interval_time == other.avg_interval_time
-               && total_comm_duration == other.total_comm_duration;
-    }    
+               && total_comm_duration == other.total_comm_duration
+               && pkts_timestamp == other.pkts_timestamp
+               && interarrival_time == other.interarrival_time
+               && avg_interarrival_time == other.avg_interarrival_time;
+    }
 };
 
-
 /*
  * Definition of hash functions for structs used as key in unordered_map
  */
@@ -366,6 +406,20 @@ namespace std {
                      ^ (hash<int>()(k.portB) << 1)) >> 1);
         }
     };
+
+    template<>
+    struct hash<convWithProt> {
+        std::size_t operator()(const convWithProt &c) const {
+            using std::size_t;
+            using std::hash;
+            using std::string;
+            return ((hash<string>()(c.ipAddressA)
+                     ^ (hash<int>()(c.portA) << 1)) >> 1)
+                     ^ ((hash<string>()(c.ipAddressB)
+                     ^ (hash<int>()(c.portB) << 1)) >> 1)
+                     ^ (hash<string>()(c.protocol));
+        }
+    };
     
     template<>
     struct hash<ipAddress_protocol> {
@@ -416,9 +470,7 @@ public:
 
     void addConvStat(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp);
 
-    void addConvStatStateless(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp);
-
-    void addCommInterval(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp);
+    void addConvStatExt(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport,std::string protocol, std::chrono::microseconds timestamp);
 
     void createCommIntervalStats();
 
@@ -537,10 +589,10 @@ private:
     // average of inter-arrival times}
     std::unordered_map<conv, entry_convStat> conv_statistics;
 
-    // {IP Address A, Port A, IP Address B, Port B,   #packets, packets timestamps, inter-arrival times,
-    // average of inter-arrival times}
+    // {IP Address A, Port A, IP Address B, Port B,   comm_intervals, #packets, avg. pkt rate, avg. #packets per interval,
+    // avg. time between intervals, avg. interval time, duration, packets timestamps, inter-arrivtal times, average of inter-arrival times}
     // Also stores conversation with only one exchanged message. In this case avgPktRate, minDelay, maxDelay and avgDelay are -1
-    std::unordered_map<conv, entry_convStat> conv_statistics_stateless;
+    std::unordered_map<convWithProt, entry_convStatExt> conv_statistics_extended;
 
     // {Last timestamp in the interval, #packets, #bytes, source IP entropy, destination IP entropy,
     // source IP cumulative entropy, destination IP cumulative entropy, #payload, #incorrect TCP checksum,
@@ -573,13 +625,6 @@ private:
 
     // {IP Address, MAC Address}
     std::unordered_map<std::string, std::string> ip_mac_mapping;
-
-    // {IP Address A, Port A, IP Address B, Port B, listof(commInterval)}
-    // Used to manage all communication intervals for a pair of communicating hosts
-    std::unordered_map<conv, std::vector<commInterval> > comm_intervals;
-
-    // {IP Address A, Port A, IP Address B, Port B, avg #packets, avg time between intervals}
-    std::unordered_map<conv, entry_commIntervalStat> comm_interval_statistics;
 };
 
 

+ 68 - 98
code_boost/src/cxx/statistics_db.cpp

@@ -380,80 +380,98 @@ void statistics_db::writeStatisticsConv(std::unordered_map<conv, entry_convStat>
     }
 }
 
-
 /**
- * Writes the stateless conversation statistics into the database.
- * @param convStatistics The stateless conversation from class statistics.
+ * Writes the extended statistics for every conversation into the database.
+ * @param conv_statistics_extended The extended conversation statistics from class statistics.
  */
-void statistics_db::writeStatisticsConvStateless(std::unordered_map<conv, entry_convStat> convStatistics){          
+void statistics_db::writeStatisticsConvExt(std::unordered_map<convWithProt, entry_convStatExt> conv_statistics_extended){
     try {
-        db->exec("DROP TABLE IF EXISTS conv_statistics_stateless");
+        db->exec("DROP TABLE IF EXISTS conv_statistics_extended");
         SQLite::Transaction transaction(*db);
-        const char *createTable = "CREATE TABLE conv_statistics_stateless ("
+        const char *createTable = "CREATE TABLE conv_statistics_extended ("
                 "ipAddressA TEXT,"
                 "portA INTEGER,"
                 "ipAddressB TEXT,"              
                 "portB INTEGER,"
+                "protocol TEXT,"
                 "pktsCount INTEGER,"
                 "avgPktRate REAL,"
                 "avgDelay INTEGER,"
                 "minDelay INTEGER,"
                 "maxDelay INTEGER,"
-                "PRIMARY KEY(ipAddressA,portA,ipAddressB,portB));";
+                "avgIntervalPktCount REAL,"
+                "avgTimeBetweenIntervals REAL,"
+                "avgIntervalTime REAL,"
+                "totalConversationDuration REAL,"
+                "PRIMARY KEY(ipAddressA,portA,ipAddressB,portB,protocol));";
         db->exec(createTable);
-        SQLite::Statement query(*db, "INSERT INTO conv_statistics_stateless VALUES (?, ?, ?, ?, ?,  ?, ?, ?, ?)");
+        SQLite::Statement query(*db, "INSERT INTO conv_statistics_extended VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
+        // iterate over every conversation and interval aggregation pair and store the respective values in the database
+        for (auto it = conv_statistics_extended.begin(); it != conv_statistics_extended.end(); ++it) {
+            convWithProt f = it->first;
+            entry_convStatExt e = it->second;
 
-        // Calculate average of inter-arrival times and average packet rate
-        for (auto it = convStatistics.begin(); it != convStatistics.end(); ++it) {
-            conv f = it->first;
-            entry_convStat e = it->second;
-            if (e.pkts_count > 0){
+            int sumDelay = 0;
+            int minDelay = -1;
+            int maxDelay = -1;
+
+            if (e.pkts_count > 1 && f.protocol == "TCP"){
+                for (int i = 0; (unsigned) i < e.interarrival_time.size(); i++) {
+                    sumDelay += e.interarrival_time[i].count();
+                    if (maxDelay < e.interarrival_time[i].count())
+                        maxDelay = e.interarrival_time[i].count();
+                    if (minDelay > e.interarrival_time[i].count() || minDelay == -1)
+                        minDelay = e.interarrival_time[i].count();
+                }
+                if (e.interarrival_time.size() > 0)
+                    e.avg_interarrival_time = (std::chrono::microseconds) sumDelay / e.interarrival_time.size(); // average
+                else 
+                    e.avg_interarrival_time = (std::chrono::microseconds) 0;
+            }
+
+            if (e.total_comm_duration == 0) 
+                e.avg_pkt_rate = e.pkts_count; // pkt per sec
+            else 
+                e.avg_pkt_rate = e.pkts_count / e.total_comm_duration;
+
+            if (e.avg_int_pkts_count > 0){
                 query.bind(1, f.ipAddressA);
                 query.bind(2, f.portA);
                 query.bind(3, f.ipAddressB);
                 query.bind(4, f.portB);
+                query.bind(5, f.protocol);
+                query.bind(6, (int) e.pkts_count);
+                query.bind(7, (float) e.avg_pkt_rate);
+                query.bind(8, (int) e.avg_interarrival_time.count());
+                query.bind(9, minDelay);
+                query.bind(10, maxDelay);
+                query.bind(11, e.avg_int_pkts_count);
+                query.bind(12, e.avg_time_between_ints);
+                query.bind(13, e.avg_interval_time);
+                query.bind(14, e.total_comm_duration);
+                query.exec();
 
-                if (e.pkts_count == 1){
-                    e.avg_pkt_rate = (float) -1;
-                    e.avg_interarrival_time = (std::chrono::microseconds) -1;
+                std::string primary_where = "WHERE ipAddressA=\"" + f.ipAddressA + "\" AND portA=" + std::to_string(f.portA) + " AND ipAddressB=\"";
+                primary_where += f.ipAddressB + "\" AND portB=" + std::to_string(f.portB) + " AND protocol=\"" + f.protocol + "\";";
+                std::string update_stmt;
 
-                    query.bind(5, (int) e.pkts_count);
-                    query.bind(6, (float) e.avg_pkt_rate);
-                    query.bind(7, (int) e.avg_interarrival_time.count());
-                    query.bind(8, -1);
-                    query.bind(9, -1);
-                    query.exec();
-                    query.reset();
+                // replace -1 with null
+                if (minDelay == -1){
+                    update_stmt = "UPDATE conv_statistics_extended SET minDelay=NULL " + primary_where;
+                    db->exec(update_stmt);
                 }
-                else {
-                    int sumDelay = 0;
-                    int minDelay = -1;
-                    int maxDelay = -1;
-                    for (int i = 0; (unsigned) i < e.interarrival_time.size(); i++) {
-                        sumDelay += e.interarrival_time[i].count();
-                        if (maxDelay < e.interarrival_time[i].count())
-                            maxDelay = e.interarrival_time[i].count();
-                        if (minDelay > e.interarrival_time[i].count() || minDelay == -1)
-                            minDelay = e.interarrival_time[i].count();
-                    }
-                    if (e.interarrival_time.size() > 0)
-                        e.avg_interarrival_time = (std::chrono::microseconds) sumDelay / e.interarrival_time.size(); // average
-                    else e.avg_interarrival_time = (std::chrono::microseconds) 0;
 
-                    std::chrono::microseconds start_timesttamp = e.pkts_timestamp[0];
-                    std::chrono::microseconds end_timesttamp = e.pkts_timestamp.back();
-                    std::chrono::microseconds conn_duration = end_timesttamp - start_timesttamp;
-                    e.avg_pkt_rate = (float) e.pkts_count * 1000000 / conn_duration.count(); // pkt per sec
+                if (maxDelay == -1){
+                    update_stmt = "UPDATE conv_statistics_extended SET maxDelay=NULL " + primary_where;
+                    db->exec(update_stmt);
+                }
 
-                    
-                    query.bind(5, (int) e.pkts_count);
-                    query.bind(6, (float) e.avg_pkt_rate);
-                    query.bind(7, (int) e.avg_interarrival_time.count());
-                    query.bind(8, minDelay);
-                    query.bind(9, maxDelay);
-                    query.exec();
-                    query.reset();
+                if (f.protocol == "UDP" || (f.protocol == "TCP" && e.pkts_count < 2)){
+                    update_stmt = "UPDATE conv_statistics_extended SET avgDelay=NULL " + primary_where;
+                    db->exec(update_stmt);
                 }
+
+                query.reset();
             }
             
         }
@@ -464,7 +482,6 @@ void statistics_db::writeStatisticsConvStateless(std::unordered_map<conv, entry_
     }
 }
 
-
 /**
  * Writes the interval statistics into the database.
  * @param intervalStatistics The interval entries from class statistics.
@@ -521,51 +538,4 @@ void statistics_db::writeStatisticsInterval(std::unordered_map<std::string, entr
     catch (std::exception &e) {
         std::cout << "Exception in statistics_db: " << e.what() << std::endl;
     }
-}
-
-/**
- * Writes the communication interval statistics for every conversation into the database.
- * @param commIntervalStatistics The communication interval statistics from class statistics.
- */
-void statistics_db::writeCommIntervalStats(std::unordered_map<conv, entry_commIntervalStat> commIntervalStatistics){
-    try {
-        db->exec("DROP TABLE IF EXISTS comm_interval_statistics");
-        SQLite::Transaction transaction(*db);
-        const char *createTable = "CREATE TABLE comm_interval_statistics ("
-                "ipAddressA TEXT,"
-                "portA INTEGER,"
-                "ipAddressB TEXT,"              
-                "portB INTEGER,"
-                "avgPktCount REAL,"
-                "avgTimeBetweenIntervals REAL,"
-                "avgIntervalTime REAL,"
-                "totalCommDuration REAL,"
-                "PRIMARY KEY(ipAddressA,portA,ipAddressB,portB));";
-        db->exec(createTable);
-        SQLite::Statement query(*db, "INSERT INTO comm_interval_statistics VALUES (?, ?, ?, ?, ?, ?, ?, ?)");
-
-        // iterate over every conversation and interval aggregation pair and store the respective values in the database
-        for (auto it = commIntervalStatistics.begin(); it != commIntervalStatistics.end(); ++it) {
-            conv f = it->first;
-            entry_commIntervalStat e = it->second;
-            if (e.avg_pkts_count > 0){
-                query.bind(1, f.ipAddressA);
-                query.bind(2, f.portA);
-                query.bind(3, f.ipAddressB);
-                query.bind(4, f.portB);
-                query.bind(5, e.avg_pkts_count);
-                query.bind(6, e.avg_time_between);
-                query.bind(7, e.avg_interval_time);
-                query.bind(8, e.total_comm_duration);
-                
-                query.exec();
-                query.reset();
-            }
-            
-        }
-        transaction.commit();
-    }
-    catch (std::exception &e) {
-        std::cout << "Exception in statistics_db: " << e.what() << std::endl;
-    }
-}
+}

+ 1 - 3
code_boost/src/cxx/statistics_db.h

@@ -43,12 +43,10 @@ public:
 
     void writeStatisticsConv(std::unordered_map<conv, entry_convStat> convStatistics);
 
-    void writeStatisticsConvStateless(std::unordered_map<conv, entry_convStat> convStatistics);
+    void writeStatisticsConvExt(std::unordered_map<convWithProt, entry_convStatExt> conv_statistics_extended);
 
     void writeStatisticsInterval(std::unordered_map<std::string, entry_intervalStat> intervalStatistics);
 
-    void writeCommIntervalStats(std::unordered_map<conv, entry_commIntervalStat> commIntervalStatistics);
-
 private:
     // Pointer to the SQLite database
     std::unique_ptr<SQLite::Database> db;