Просмотр исходного кода

merged statistics.h/cpp of both groups

Marcel 6 лет назад
Родитель
Сommit
ba8ee9ced9
2 измененных файлов с 229 добавлено и 3 удалено
  1. 115 1
      code_boost/src/cxx/statistics.cpp
  2. 114 2
      code_boost/src/cxx/statistics.h

+ 115 - 1
code_boost/src/cxx/statistics.cpp

@@ -247,6 +247,101 @@ void statistics::addConvStat(std::string ipAddressSender,int sport,std::string i
     }
 }
 
+/**
+ * Registers statistical data for a sent packet in a given extended conversation (two IPs, two ports, protocol). 
+ * Increments the counter packets_A_B or packets_B_A.
+ * Adds the timestamp of the packet in pkts_A_B_timestamp or pkts_B_A_timestamp.
+ * Updates all other statistics of conv_statistics_extended
+ * @param ipAddressSender The sender IP address.
+ * @param sport The source port.
+ * @param ipAddressReceiver The receiver IP address.
+ * @param dport The destination port.
+ * @param protocol The used protocol.
+ * @param timestamp The timestamp of the packet.
+ */
+void statistics::addConvStatExt(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport,std::string protocol, std::chrono::microseconds timestamp){
+    convWithProt f1 = {ipAddressReceiver, dport, ipAddressSender, sport, protocol};
+    convWithProt f2 = {ipAddressSender, sport, ipAddressReceiver, dport, protocol};
+    convWithProt f;
+
+    // if there already exists a communication interval for the specified conversation
+    if (conv_statistics_extended.count(f1) > 0 || conv_statistics_extended.count(f2) > 0){
+
+        // find out which direction of conversation is contained in conv_statistics_extended
+        if (conv_statistics_extended.count(f1) > 0)
+            f = f1;
+        else
+            f = f2;
+
+        // increase pkts count and check on delay
+        conv_statistics_extended[f].pkts_count++;
+        if (conv_statistics_extended[f].pkts_timestamp.size()>0 && conv_statistics_extended[f].pkts_count<=3)
+            conv_statistics_extended[f].interarrival_time.push_back(std::chrono::duration_cast<std::chrono::microseconds> (timestamp - conv_statistics_extended[f].pkts_timestamp.back()));
+        conv_statistics_extended[f].pkts_timestamp.push_back(timestamp);
+
+        // if the time difference has exceeded the threshold, create a new interval with this message
+        if (timestamp - conv_statistics_extended[f].comm_intervals.back().end > (std::chrono::microseconds) ((unsigned long) COMM_INTERVAL_THRESHOLD)) {  // > or >= ?
+            commInterval new_interval = {timestamp, timestamp, 1};
+            conv_statistics_extended[f].comm_intervals.push_back(new_interval);
+        }  
+        // otherwise, set the time of the last interval message to the current timestamp and increase interval packet count by 1
+        else{
+            conv_statistics_extended[f].comm_intervals.back().end = timestamp;
+            conv_statistics_extended[f].comm_intervals.back().pkts_count++;
+        }
+    }
+    // if there does not exist a communication interval for the specified conversation
+    else{
+        // add initial interval entry for this conversation
+        commInterval initial_interval = {timestamp, timestamp, 1};
+
+        entry_convStatExt entry;
+        entry.comm_intervals.push_back(initial_interval);
+        entry.pkts_count = 1;
+        entry.pkts_timestamp.push_back(timestamp);
+        conv_statistics_extended[f2] = entry;
+    }
+}
+
+/**
+ * Aggregate the collected information about all communication intervals within conv_statistics_extended of every conversation.
+ * Do this by computing the average packet rate per interval and the average time between intervals.
+ * Also compute average interval duration and total communication duration (i.e. last_msg.time - first_msg.time)
+ */
+void statistics::createCommIntervalStats(){    
+    // iterate over all <convWithProt, entry_convStatExt> pairs
+    for (auto &cur_elem : conv_statistics_extended) {
+        entry_convStatExt &entry = cur_elem.second;
+        std::vector<commInterval> &intervals = entry.comm_intervals;
+
+        // if there is only one interval, the time between intervals cannot be computed and is therefore set to 0
+        if (intervals.size() == 1){
+            double interval_duration = (double) (intervals[0].end - intervals[0].start).count() / (double) 1e6;
+            entry.avg_int_pkts_count = (double) intervals[0].pkts_count;
+            entry.avg_time_between_ints = (double) 0;
+            entry.avg_interval_time = interval_duration;
+        }
+        // If there is more than one interval, compute the specified averages
+        else if (intervals.size() > 1){
+            long summed_pkts_count = intervals[0].pkts_count;
+            std::chrono::microseconds time_between_ints_sum = (std::chrono::microseconds) 0;
+            std::chrono::microseconds summed_int_duration = intervals[0].end - intervals[0].start;
+
+            for (std::size_t i = 1; i < intervals.size(); i++) {
+                summed_pkts_count += intervals[i].pkts_count;
+                summed_int_duration += intervals[i].end - intervals[i].start;
+                time_between_ints_sum += intervals[i].start - intervals[i - 1].end;
+            }
+
+            entry.avg_int_pkts_count = summed_pkts_count / ((double) intervals.size());
+            entry.avg_time_between_ints = (time_between_ints_sum.count() / (double) (intervals.size() - 1)) / (double) 1e6;
+            entry.avg_interval_time = (summed_int_duration.count() / (double) intervals.size()) / (double) 1e6;
+
+        }
+        entry.total_comm_duration = (double) (entry.pkts_timestamp.back() - entry.pkts_timestamp.front()).count() / (double) 1e6;
+    }
+}
+
 /**
  * Increments the packet counter for the given IP address and MSS value.
  * @param ipAddress The IP address whose MSS packet counter should be incremented.
@@ -300,7 +395,6 @@ void statistics::incrementProtocolCount(std::string ipAddress, std::string proto
  * Returns the number of packets seen for the given IP address and protocol.
  * @param ipAddress The IP address whose packet count is wanted.
  * @param protocol The protocol whose packet count is wanted.
- * @return an integer: The number of packets
  */
 int statistics::getProtocolCount(std::string ipAddress, std::string protocol) {
     return protocol_distribution[{ipAddress, protocol}].count;
@@ -418,6 +512,24 @@ void statistics::addIpStat_packetSent(std::string filePath, std::string ipAddres
     ip_statistics[ipAddressReceiver].kbytes_received += (float(bytesSent) / 1024);
     ip_statistics[ipAddressReceiver].pkts_received++;
     ip_statistics[ipAddressReceiver].pkts_received_timestamp.push_back(timestamp);
+
+    // Increment Degrees for sender and receiver, if Sender sends its first packet to this receiver
+    std::vector<std::string>::iterator found_receiver = std::find(contacted_ips[ipAddressSender].begin(), contacted_ips[ipAddressSender].end(), ipAddressReceiver);
+    if(found_receiver == contacted_ips[ipAddressSender].end()){
+        // Receiver is NOT contained in the List of IPs, that the Sender has contacted, therefore this is the first packet in this direction
+        ip_statistics[ipAddressSender].out_degree++;
+        ip_statistics[ipAddressReceiver].in_degree++;
+
+        // Increment overall_degree only if this is the first packet for the connection (both directions)
+        // Therefore check, whether Receiver has contacted Sender before
+        std::vector<std::string>::iterator sender_contacted = std::find(contacted_ips[ipAddressReceiver].begin(), contacted_ips[ipAddressReceiver].end(), ipAddressSender);
+        if(sender_contacted == contacted_ips[ipAddressReceiver].end()){
+            ip_statistics[ipAddressSender].overall_degree++;
+            ip_statistics[ipAddressReceiver].overall_degree++;
+        }  
+
+        contacted_ips[ipAddressSender].push_back(ipAddressReceiver);
+    }
 }
 
 /**
@@ -644,12 +756,14 @@ void statistics::writeToDatabase(std::string database_path) {
         db.writeStatisticsIP(ip_statistics);
         db.writeStatisticsTTL(ttl_distribution);
         db.writeStatisticsIpMac(ip_mac_mapping);
+        db.writeStatisticsDegree(ip_statistics);
         db.writeStatisticsPorts(ip_ports);
         db.writeStatisticsProtocols(protocol_distribution);
         db.writeStatisticsMSS(mss_distribution);
         db.writeStatisticsToS(tos_distribution);
         db.writeStatisticsWin(win_distribution);
         db.writeStatisticsConv(conv_statistics);
+        db.writeStatisticsConvExt(conv_statistics_extended);
         db.writeStatisticsInterval(interval_statistics);
         db.writeDbVersion();
         db.writeStatisticsUnrecognizedPDUs(unrecognized_PDUs);

+ 114 - 2
code_boost/src/cxx/statistics.h

@@ -15,6 +15,8 @@
 
 using namespace Tins;
 
+#define COMM_INTERVAL_THRESHOLD 10e6  // in microseconds; i.e. here 10s
+
 /*
  * Definition of structs used in unordered_map fields
  */
@@ -60,6 +62,30 @@ struct conv{
     }    
 };
 
+/*
+ * Struct used to represent a conversation by:
+ * - IP address A
+ * - Port A
+ * - IP address B
+ * - Port B
+ * - Protocol
+ */
+struct convWithProt{
+    std::string ipAddressA;
+    int portA;
+    std::string ipAddressB;
+    int portB;
+    std::string protocol;
+
+    bool operator==(const convWithProt &other) const {
+        return ipAddressA == other.ipAddressA
+               && portA == other.portA
+               &&ipAddressB == other.ipAddressB
+               && portB == other.portB
+               && protocol == other.protocol;
+    }    
+};
+
 /*
  * Struct used to represent:
  * - IP address (IPv4 or IPv6)
@@ -148,6 +174,9 @@ struct entry_ipStat {
     float kbytes_received;
     float kbytes_sent;
     std::string ip_class;
+    int in_degree;
+    int out_degree;
+    int overall_degree;
     // Collects statstics over time interval
     std::vector<float> interval_pkt_rate;
     float max_interval_pkt_rate;
@@ -168,7 +197,6 @@ struct entry_ipStat {
                && pkts_received_timestamp == other.pkts_received_timestamp;
     }
 };
-
 /*
  * Struct used to represent:
  * - Number of transmitted packets
@@ -285,6 +313,63 @@ struct ipAddress_inOut_port {
     }
 };
 
+/*
+ * Struct used to represent a communication interval (for two hosts):
+ * - Timestamp of the first packet in the interval
+ * - Timestamp of the last packet in the interval
+ * - The count of packets within the interval
+ */
+struct commInterval{
+    std::chrono::microseconds start;
+    std::chrono::microseconds end;
+    long pkts_count;
+
+    bool operator==(const commInterval &other) const {
+        return start == other.start
+               && end == other.end
+               && pkts_count == other.pkts_count;
+    }    
+};
+
+/*
+ * Struct used to represent converstaion statistics:
+ * - commnication intervals
+ * - # packets
+ * - Average packet rate
+ * - average # packets per communication interval
+ * - Average time between intervals
+ * - Average duration of a communication interval
+ * - Overall communication duration
+ * - Timestamps of packets
+ * - Inter-arrival time
+ * - Average inter-arrival time
+ */
+struct entry_convStatExt {
+    std::vector<commInterval> comm_intervals;
+    long pkts_count;
+    float avg_pkt_rate;
+    double avg_int_pkts_count;
+    double avg_time_between_ints;
+    double avg_interval_time;
+    double total_comm_duration;
+    std::vector<std::chrono::microseconds> pkts_timestamp;
+    std::vector<std::chrono::microseconds> interarrival_time;
+    std::chrono::microseconds avg_interarrival_time;
+
+    bool operator==(const entry_convStatExt &other) const {
+        return comm_intervals == other.comm_intervals
+               && pkts_count == other.pkts_count
+               && avg_pkt_rate == avg_pkt_rate
+               && avg_int_pkts_count == other.avg_int_pkts_count
+               && avg_time_between_ints == other.avg_time_between_ints
+               && avg_interval_time == other.avg_interval_time
+               && total_comm_duration == other.total_comm_duration
+               && pkts_timestamp == other.pkts_timestamp
+               && interarrival_time == other.interarrival_time
+               && avg_interarrival_time == other.avg_interarrival_time;
+    }
+};
+
 /*
  * Struct used to represent:
  * - Source MAC address
@@ -313,7 +398,6 @@ struct unrecognized_PDU_stat {
     std::string timestamp_last_occurrence;
 };
 
-
 /*
  * Definition of hash functions for structs used as key in unordered_map
  */
@@ -374,6 +458,20 @@ namespace std {
                      ^ (hash<int>()(k.portB) << 1)) >> 1);
         }
     };
+
+    template<>
+    struct hash<convWithProt> {
+        std::size_t operator()(const convWithProt &c) const {
+            using std::size_t;
+            using std::hash;
+            using std::string;
+            return ((hash<string>()(c.ipAddressA)
+                     ^ (hash<int>()(c.portA) << 1)) >> 1)
+                     ^ ((hash<string>()(c.ipAddressB)
+                     ^ (hash<int>()(c.portB) << 1)) >> 1)
+                     ^ (hash<string>()(c.protocol));
+        }
+    };
     
     template<>
     struct hash<ipAddress_protocol> {
@@ -436,6 +534,10 @@ public:
 
     void addConvStat(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp);
 
+    void addConvStatExt(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport,std::string protocol, std::chrono::microseconds timestamp);
+
+    void createCommIntervalStats();
+
     std::vector<float> calculateIPsCumEntropy();
 
     std::vector<float> calculateLastIntervalIPsEntropy(std::chrono::microseconds intervalStartTimestamp);
@@ -541,6 +643,7 @@ private:
     int intervalCumNovelMSSCount = 0;
     int intervalCumNovelPortCount = 0;
 
+
     /*
      * Data containers
      */
@@ -560,6 +663,11 @@ private:
     // average of inter-arrival times}
     std::unordered_map<conv, entry_convStat> conv_statistics;
 
+    // {IP Address A, Port A, IP Address B, Port B,   comm_intervals, #packets, avg. pkt rate, avg. #packets per interval,
+    // avg. time between intervals, avg. interval time, duration, packets timestamps, inter-arrivtal times, average of inter-arrival times}
+    // Also stores conversation with only one exchanged message. In this case avgPktRate, minDelay, maxDelay and avgDelay are -1
+    std::unordered_map<convWithProt, entry_convStatExt> conv_statistics_extended;
+
     // {Last timestamp in the interval, #packets, #bytes, source IP entropy, destination IP entropy,
     // source IP cumulative entropy, destination IP cumulative entropy, #payload, #incorrect TCP checksum,
     // #correct TCP checksum, #novel IP, #novel TTL, #novel Window Size, #novel ToS,#novel MSS}
@@ -580,6 +688,10 @@ private:
     // {Port, count}
     std::unordered_map<int, int> port_values;
 
+
+    //{IP Address, contacted IP Addresses}
+    std::unordered_map<std::string, std::vector<std::string>> contacted_ips;
+
     // {IP Address, Protocol,  #count, #Data transmitted in bytes}
     std::unordered_map<ipAddress_protocol, entry_protocolStat> protocol_distribution;