Forráskód Böngészése

Add statistics retrieval for communication intervals

Adds statistics of communication intervals for every UDP or TCP
conversation. Per conversation are included: average count of packets
per communication interval and average time between communication
intervals. The threshold at which two messages of the same conversation
are considered to be part of different intervals is set to 10s (initially).
Therefore, all messages of the same conversation that are apart more than
10s are considered part of different communication intervals.

Note: Since C++ code was modified, a rebuild is required.
dustin.born 6 éve
szülő
commit
4652e96777

+ 0 - 1
code/ID2TLib/Controller.py

@@ -112,7 +112,6 @@ class Controller:
         pcap_base = os.path.splitext(self.pcap_dest_path)[0]
         created_files = [self.pcap_dest_path, self.label_manager.label_file_path]
         for suffix, filename in context.get_allocated_files():
-            print(filename, pcap_base + suffix)
             shutil.move(filename, pcap_base + suffix)
             created_files.append(pcap_base + suffix)
         context.reset()

+ 15 - 5
code_boost/src/cxx/pcap_processor.cpp

@@ -158,6 +158,9 @@ void pcap_processor::collect_statistics() {
         
         // Save timestamp of last packet into statistics
         stats.setTimestampLastPacket(currentPktTimestamp);
+
+        // Create the communication interval statistics from all gathered communication intervals
+        stats.createCommIntervalStats();
     }
 }
 
@@ -244,10 +247,10 @@ void pcap_processor::process_packets(const Packet &pkt) {
         if (p == PDU::PDUType::TCP) {
             TCP tcpPkt = (const TCP &) *pdu_l4;
             
-          // Check TCP checksum
-          if (pdu_l3_type == PDU::PDUType::IP) {
-            stats.checkTCPChecksum(ipAddressSender, ipAddressReceiver, tcpPkt);
-          }
+            // Check TCP checksum
+            if (pdu_l3_type == PDU::PDUType::IP) {
+              stats.checkTCPChecksum(ipAddressSender, ipAddressReceiver, tcpPkt);
+            }
 
             stats.incrementProtocolCount(ipAddressSender, "TCP");                        
 
@@ -255,6 +258,9 @@ void pcap_processor::process_packets(const Packet &pkt) {
             stats.addConvStat(ipAddressSender, tcpPkt.sport(), ipAddressReceiver, tcpPkt.dport(), pkt.timestamp());
             stats.addConvStatStateless(ipAddressSender, tcpPkt.sport(), ipAddressReceiver, tcpPkt.dport(), pkt.timestamp()); 
 
+            // Communication interval data collection for the later created communication statistics
+            stats.addCommInterval(ipAddressSender, tcpPkt.sport(), ipAddressReceiver, tcpPkt.dport(), pkt.timestamp());
+
             // Window Size distribution
             int win = tcpPkt.window();
             stats.incrementWinCount(ipAddressSender, win);
@@ -273,9 +279,13 @@ void pcap_processor::process_packets(const Packet &pkt) {
         } else if (p == PDU::PDUType::UDP) {
             const UDP udpPkt = (const UDP &) *pdu_l4;
             stats.incrementProtocolCount(ipAddressSender, "UDP");   
+
             // Conversation statistics
             stats.addConvStatStateless(ipAddressSender, udpPkt.sport(), ipAddressReceiver, udpPkt.dport(), pkt.timestamp());           
-            stats.incrementPortCount(ipAddressSender, udpPkt.sport(), ipAddressReceiver, udpPkt.dport());             
+            stats.incrementPortCount(ipAddressSender, udpPkt.sport(), ipAddressReceiver, udpPkt.dport());      
+
+            // Communication interval data collection for the later created communication statistics
+            stats.addCommInterval(ipAddressSender, udpPkt.sport(), ipAddressReceiver, udpPkt.dport(), pkt.timestamp());       
           
         } else if (p == PDU::PDUType::ICMP) {
             stats.incrementProtocolCount(ipAddressSender, "ICMP");

+ 82 - 39
code_boost/src/cxx/statistics.cpp

@@ -278,6 +278,87 @@ void statistics::addConvStatStateless(std::string ipAddressSender,int sport,std:
     }
 }
 
+/**
+ * Adds the passed information to the relevant communication intervals of the respective conversation.
+ * If the time between the last message of the latest interval and the timestamp of the current message exceeds
+ * the threshold, a new interval is created.
+ * Note: here and within the function, conversation refers to a stateless conversation.
+ * @param ipAddressSender The sender IP address.
+ * @param sport The source port.
+ * @param ipAddressReceiver The receiver IP address.
+ * @param dport The destination port.
+ * @param timestamp The timestamp of the packet.
+ */
+void statistics::addCommInterval(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp){
+    conv f1 = {ipAddressReceiver, dport, ipAddressSender, sport};
+    conv f2 = {ipAddressSender, sport, ipAddressReceiver, dport};
+    conv f;
+
+    // if there already exists a communication interval for the specified conversation ...
+    if (comm_intervals.count(f1) > 0 || comm_intervals.count(f2) > 0){
+
+        // find out which direction of conversation is contained in comm_intervals
+        if (comm_intervals.count(f1) > 0)
+            f = f1;
+        else
+            f = f2;
+
+        // if the time difference is exceeded, create a new interval with this message
+        if (timestamp - comm_intervals[f].back().end > (std::chrono::microseconds) ((unsigned long) COMM_INTERVAL_THRESHOLD)) {  // > or >= ?
+            commInterval new_interval = {timestamp, timestamp, 1};
+            comm_intervals[f].push_back(new_interval);
+        }  
+        // otherwise, set the time of the last interval message to the current timestamp and increase interval packet count by 1
+        else{
+            comm_intervals[f].back().end = timestamp;
+            comm_intervals[f].back().pkts_count++;
+        }
+    }
+    // if there does not exist a communication interval for the specified conversation ...
+    else{
+        // add initial interval for this conversation
+        commInterval initial_interval = {timestamp, timestamp, 1};
+
+        std::vector<commInterval> intervals;
+        intervals.push_back(initial_interval);
+        comm_intervals[f1] = intervals;
+    }
+}
+
+/**
+ * Aggregate the collected information about all communication intervals of every conversation.
+ * Do this by computing the average packet rate per interval and the average time between intervals.
+ * Note: here and within the function, conversation refers to a stateless conversation.
+ */
+void statistics::createCommIntervalStats(){    
+    // iterate over all <conv, conv_intervals> pairs
+    for (auto &cur_elem : comm_intervals) {
+        conv cur_conv = cur_elem.first;
+        std::vector<commInterval> intervals = cur_elem.second;
+
+        // if there is only one interval, the time between intervals cannot be computed and is therefore set to 0
+        if (intervals.size() == 1){
+            entry_commIntervalStat e = {(double) intervals[0].pkts_count, (double) 0};
+            comm_interval_statistics[cur_conv] = e;
+        }
+        // If there is more than one interval, compute the specified averages
+        else if (intervals.size() > 1){
+            long summed_pkts_count = intervals[0].pkts_count;
+            std::chrono::microseconds time_between_ints_sum = (std::chrono::microseconds) 0;
+
+            for (int i = 1; i < intervals.size(); i++) {
+                summed_pkts_count += intervals[i].pkts_count;
+                time_between_ints_sum += intervals[i].start - intervals[i - 1].end;
+            }
+
+            double avg_pkts_count = summed_pkts_count / ((double) intervals.size());
+            double avg_time_betw_ints = (time_between_ints_sum.count() / (double) (intervals.size() - 1)) / (double) 1e6;
+            entry_commIntervalStat e = {avg_pkts_count, avg_time_betw_ints};
+            comm_interval_statistics[cur_conv] = e;
+        }
+    }
+}
+
 /**
  * Increments the packet counter for the given IP address and MSS value.
  * @param ipAddress The IP address whose MSS packet counter should be incremented.
@@ -634,6 +715,7 @@ void statistics::writeToDatabase(std::string database_path) {
         db.writeStatisticsConv(conv_statistics);
         db.writeStatisticsConvStateless(conv_statistics_stateless);
         db.writeStatisticsInterval(interval_statistics);
+        db.writeCommIntervalStats(comm_interval_statistics);
     }
     else {
         // Tinslib failed to recognize the types of the packets in the input PCAP
@@ -641,42 +723,3 @@ void statistics::writeToDatabase(std::string database_path) {
         return;
     }
 }
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-

+ 48 - 0
code_boost/src/cxx/statistics.h

@@ -15,6 +15,8 @@
 
 using namespace Tins;
 
+#define COMM_INTERVAL_THRESHOLD 10e6  // in microseconds; i.e. here 10s
+
 /*
  * Definition of structs used in unordered_map fields
  */
@@ -263,6 +265,40 @@ struct ipAddress_inOut_port {
     }
 };
 
+/*
+ * Struct used to represent a communication interval (for two hosts):
+ * - Timestamp of the first packet in the interval
+ * - Timestamp of the last packet in the interval
+ * - The count of packets within the interval
+ */
+struct commInterval{
+    std::chrono::microseconds start;
+    std::chrono::microseconds end;
+    long pkts_count;
+
+    bool operator==(const commInterval &other) const {
+        return start == other.start
+               && end == other.end
+               && pkts_count == other.pkts_count;
+    }    
+};
+
+/*
+ * Struct used to represent for the communication intervals of two hosts:
+ * - Average time between intervals
+ * - The average count of packets within an interval
+ */
+struct entry_commIntervalStat{
+    double avg_pkts_count;
+    double avg_time_between;
+
+    bool operator==(const entry_commIntervalStat &other) const {
+        return avg_pkts_count == other.avg_pkts_count
+               && avg_time_between == other.avg_time_between;
+    }    
+};
+
+
 /*
  * Definition of hash functions for structs used as key in unordered_map
  */
@@ -375,6 +411,10 @@ public:
 
     void addConvStatStateless(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp);
 
+    void addCommInterval(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp);
+
+    void createCommIntervalStats();
+
     std::vector<float> calculateIPsCumEntropy();
 
     std::vector<float> calculateLastIntervalIPsEntropy(std::chrono::microseconds intervalStartTimestamp);
@@ -470,6 +510,7 @@ private:
     int intervalCumNovelMSSCount = 0;
     int intervalCumNovelPortCount = 0;
 
+
     /*
      * Data containers
      */
@@ -525,6 +566,13 @@ private:
 
     // {IP Address, MAC Address}
     std::unordered_map<std::string, std::string> ip_mac_mapping;
+
+    // {IP Address A, Port A, IP Address B, Port B, listof(commInterval)}
+    // Used to manage all communication intervals for a pair of communicating hosts
+    std::unordered_map<conv, std::vector<commInterval> > comm_intervals;
+
+    // {IP Address A, Port A, IP Address B, Port B, avg #packets, avg time between intervals}
+    std::unordered_map<conv, entry_commIntervalStat> comm_interval_statistics;
 };
 
 

+ 42 - 0
code_boost/src/cxx/statistics_db.cpp

@@ -523,3 +523,45 @@ void statistics_db::writeStatisticsInterval(std::unordered_map<std::string, entr
     }
 }
 
+/**
+ * Writes the communication interval statistics for every conversation into the database.
+ * @param commIntervalStatistics The communication interval statistics from class statistics.
+ */
+void statistics_db::writeCommIntervalStats(std::unordered_map<conv, entry_commIntervalStat> commIntervalStatistics){
+    try {
+        db->exec("DROP TABLE IF EXISTS comm_interval_statistics");
+        SQLite::Transaction transaction(*db);
+        const char *createTable = "CREATE TABLE comm_interval_statistics ("
+                "ipAddressA TEXT,"
+                "portA INTEGER,"
+                "ipAddressB TEXT,"              
+                "portB INTEGER,"
+                "avgPktCount REAL,"
+                "avgTimeBetweenIntervals REAL,"
+                "PRIMARY KEY(ipAddressA,portA,ipAddressB,portB));";
+        db->exec(createTable);
+        SQLite::Statement query(*db, "INSERT INTO comm_interval_statistics VALUES (?, ?, ?, ?, ?, ?)");
+
+        // iterate over every conversation and interval aggregation pair and store the respective values in the database
+        for (auto it = commIntervalStatistics.begin(); it != commIntervalStatistics.end(); ++it) {
+            conv f = it->first;
+            entry_commIntervalStat e = it->second;
+            if (e.avg_pkts_count > 0){
+                query.bind(1, f.ipAddressA);
+                query.bind(2, f.portA);
+                query.bind(3, f.ipAddressB);
+                query.bind(4, f.portB);
+                query.bind(5, e.avg_pkts_count);
+                query.bind(6, e.avg_time_between);
+                
+                query.exec();
+                query.reset();
+            }
+            
+        }
+        transaction.commit();
+    }
+    catch (std::exception &e) {
+        std::cout << "Exception in statistics_db: " << e.what() << std::endl;
+    }
+}

+ 2 - 0
code_boost/src/cxx/statistics_db.h

@@ -47,6 +47,8 @@ public:
 
     void writeStatisticsInterval(std::unordered_map<std::string, entry_intervalStat> intervalStatistics);
 
+    void writeCommIntervalStats(std::unordered_map<conv, entry_commIntervalStat> commIntervalStatistics);
+
 private:
     // Pointer to the SQLite database
     std::unique_ptr<SQLite::Database> db;