Browse Source

add entropies for ttl, winsize, tos, mss and ports to interval stats

add entropies for their novel values as well
Jens Keim 5 years ago
parent
commit
8c65d13016

+ 67 - 9
code_boost/src/cxx/statistics.cpp

@@ -108,7 +108,6 @@ std::vector<float> statistics::calculateLastIntervalIPsEntropy(std::chrono::micr
     }
 }
 
-
 /**
  * Calculates the cumulative entropy of the source and destination IPs, i.e., the entropy for packets from the beginning of the pcap file.
  * @return a vector: contains the cumulative entropies of source and destination IPs
@@ -167,6 +166,55 @@ void statistics::calculateIPIntervalPacketRate(std::chrono::duration<int, std::m
         }
 }
 
+/**
+ * Calculates the entropies for the count of integer values.
+ * @param current map containing the values with counts
+ * @param an old map containing the values with counts (from last iteration)
+ * @return a vector containing the calculated entropies: entropy of all updated values, entropy of all novel values
+ */
+std::vector<double> statistics::calculateEntropies(std::unordered_map<int, int> &map, std::unordered_map<int, int> &old) {
+    std::vector<double> counts;
+    int count_total = 0;
+    double entropy = 0.0;
+
+    std::vector<double> novel_counts;
+    int novel_count_total = 0;
+    double novel_entropy = 0.0;
+
+    // iterate over all values
+    for (auto iter: map) {
+        if (old.count(iter.first) == 0) {
+            // count novel values
+            double novel_count = static_cast<double>(iter.second);
+            counts.push_back(novel_count);
+            count_total += novel_count;
+            novel_counts.push_back(novel_count);
+            novel_count_total += novel_count;
+        } else if (old.count(iter.first) != map.count(iter.first)) {
+            // count all increased values
+            double count = static_cast<double>(iter.second-old[iter.first]);
+            if (count != 0.0) {
+                counts.push_back(count);
+                count_total += count;
+            }
+        }
+    }
+
+    // calculate entropy
+    for (auto count: counts) {
+        double prob = count / static_cast<double>(count_total);
+        entropy += -1 * prob * log2(prob);
+    }
+
+    // calculate novelty entropy
+    for (auto novel_count: novel_counts) {
+        double novel_prob = novel_count / static_cast<double>(novel_count_total);
+        novel_entropy += -1 * novel_prob * log2(novel_prob);
+    }
+
+    return {entropy, novel_entropy};
+}
+
 /**
  * Registers statistical data for a time interval.
  * @param intervalStartTimestamp The timstamp where the interval starts.
@@ -194,25 +242,35 @@ void statistics::addIntervalStat(std::chrono::duration<int, std::micro> interval
     interval_statistics[lastPktTimestamp_s].payload_count = payloadCount - intervalPayloadCount;
     interval_statistics[lastPktTimestamp_s].incorrect_tcp_checksum_count = incorrectTCPChecksumCount - intervalIncorrectTCPChecksumCount;
     interval_statistics[lastPktTimestamp_s].correct_tcp_checksum_count = correctTCPChecksumCount - intervalCorrectTCPChecksumCount;
-    interval_statistics[lastPktTimestamp_s].novel_ip_count = ip_statistics.size() - intervalCumNovelIPCount;
-    interval_statistics[lastPktTimestamp_s].novel_ttl_count = ttl_values.size() - intervalCumNovelTTLCount;
-    interval_statistics[lastPktTimestamp_s].novel_win_size_count = win_values.size() - intervalCumNovelWinSizeCount;
-    interval_statistics[lastPktTimestamp_s].novel_tos_count = tos_values.size() - intervalCumNovelToSCount;
-    interval_statistics[lastPktTimestamp_s].novel_mss_count = mss_values.size() - intervalCumNovelMSSCount;
-    interval_statistics[lastPktTimestamp_s].novel_port_count = port_values.size() - intervalCumNovelPortCount;
-
+    interval_statistics[lastPktTimestamp_s].novel_ip_count = static_cast<int>(ip_statistics.size()) - intervalCumNovelIPCount;
+    interval_statistics[lastPktTimestamp_s].novel_ttl_count = static_cast<int>(ttl_values.size()) - intervalCumNovelTTLCount;
+    interval_statistics[lastPktTimestamp_s].novel_win_size_count = static_cast<int>(win_values.size()) - intervalCumNovelWinSizeCount;
+    interval_statistics[lastPktTimestamp_s].novel_tos_count = static_cast<int>(tos_values.size()) - intervalCumNovelToSCount;
+    interval_statistics[lastPktTimestamp_s].novel_mss_count = static_cast<int>(mss_values.size()) - intervalCumNovelMSSCount;
+    interval_statistics[lastPktTimestamp_s].novel_port_count = static_cast<int>(port_values.size()) - intervalCumNovelPortCount;
+
+    interval_statistics[lastPktTimestamp_s].ttl_entropies = calculateEntropies(ttl_values, intervalCumTTLValues);
+    interval_statistics[lastPktTimestamp_s].win_size_entropies = calculateEntropies(win_values, intervalCumWinSizeValues);
+    interval_statistics[lastPktTimestamp_s].tos_entropies = calculateEntropies(tos_values, intervalCumTosValues);
+    interval_statistics[lastPktTimestamp_s].mss_entropies = calculateEntropies(mss_values, intervalCumMSSValues);
+    interval_statistics[lastPktTimestamp_s].port_entropies = calculateEntropies(port_values, intervalCumPortValues);
 
     intervalPayloadCount = payloadCount;
     intervalIncorrectTCPChecksumCount = incorrectTCPChecksumCount;
     intervalCorrectTCPChecksumCount = correctTCPChecksumCount;
     intervalCumPktCount = packetCount;
     intervalCumSumPktSize = sumPacketSize;
-    intervalCumNovelIPCount =  ip_statistics.size();
+    intervalCumNovelIPCount = ip_statistics.size();
     intervalCumNovelTTLCount = ttl_values.size();
     intervalCumNovelWinSizeCount = win_values.size();
     intervalCumNovelToSCount = tos_values.size();
     intervalCumNovelMSSCount = mss_values.size();
     intervalCumNovelPortCount = port_values.size();
+    intervalCumTTLValues = ttl_values;
+    intervalCumWinSizeValues = win_values;
+    intervalCumTosValues = tos_values;
+    intervalCumMSSValues = mss_values;
+    intervalCumPortValues = port_values;
 
     if(ipEntopies.size()>1){
         interval_statistics[lastPktTimestamp_s].ip_src_entropy = ipEntopies[0];

+ 15 - 2
code_boost/src/cxx/statistics.h

@@ -242,10 +242,15 @@ struct entry_intervalStat {
     float pkt_rate;
     float kbytes;
     float kbyte_rate;
-    float ip_src_entropy; 
+    float ip_src_entropy;
     float ip_dst_entropy;
-    float ip_src_cum_entropy; 
+    float ip_src_cum_entropy;
     float ip_dst_cum_entropy;
+    std::vector<double> ttl_entropies;
+    std::vector<double> win_size_entropies;
+    std::vector<double> tos_entropies;
+    std::vector<double> mss_entropies;
+    std::vector<double> port_entropies;
     int payload_count;
     int incorrect_tcp_checksum_count;
     int correct_tcp_checksum_count;
@@ -256,6 +261,7 @@ struct entry_intervalStat {
     int novel_mss_count;
     int novel_port_count;
 
+    // FIXME: add new attributes to operator==
     bool operator==(const entry_intervalStat &other) const {
         return start == other.start
                && end == other.end
@@ -552,6 +558,8 @@ public:
 
     std::vector<float> calculateLastIntervalIPsEntropy(std::chrono::microseconds intervalStartTimestamp);
 
+    std::vector<double> calculateEntropies(std::unordered_map<int, int> &map, std::unordered_map<int, int> &old);
+
     void addIntervalStat(std::chrono::duration<int, std::micro> interval, std::chrono::microseconds intervalStartTimestamp, std::chrono::microseconds lastPktTimestamp);
 
     void checkPayload(const PDU *pdu_l4);
@@ -659,6 +667,11 @@ private:
     int intervalCumNovelToSCount = 0;
     int intervalCumNovelMSSCount = 0;
     int intervalCumNovelPortCount = 0;
+    std::unordered_map<int,int> intervalCumTTLValues;
+    std::unordered_map<int,int> intervalCumWinSizeValues;
+    std::unordered_map<int,int> intervalCumTosValues;
+    std::unordered_map<int,int> intervalCumMSSValues;
+    std::unordered_map<int,int> intervalCumPortValues;
 
     int default_interval = 0;
 

+ 21 - 1
code_boost/src/cxx/statistics_db.cpp

@@ -683,9 +683,19 @@ void statistics_db::writeStatisticsInterval(const std::unordered_map<std::string
                     "newWinSizeCount INTEGER,"
                     "newToSCount INTEGER,"
                     "newMSSCount INTEGER,"
+                    "PortEntropy REAL,"
+                    "TTLEntropy REAL,"
+                    "WinSizeEntropy REAL,"
+                    "ToSEntropy REAL,"
+                    "MSSEntropy REAL,"
+                    "newPortEntropy REAL,"
+                    "newTTLEntropy REAL,"
+                    "newWinSizeEntropy REAL,"
+                    "newToSEntropy REAL,"
+                    "newMSSEntropy REAL,"
                     "PRIMARY KEY(lastPktTimestamp));");
 
-            SQLite::Statement query(*db, "INSERT INTO " + table_name + " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
+            SQLite::Statement query(*db, "INSERT INTO " + table_name + " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
             for (auto it = intervalStatistics.begin(); it != intervalStatistics.end(); ++it) {
                 const entry_intervalStat &e = it->second;
 
@@ -709,6 +719,16 @@ void statistics_db::writeStatisticsInterval(const std::unordered_map<std::string
                 query.bind(18, e.novel_win_size_count);
                 query.bind(19, e.novel_tos_count);
                 query.bind(20, e.novel_mss_count);
+                query.bind(21, e.port_entropies[0]);
+                query.bind(22, e.ttl_entropies[0]);
+                query.bind(23, e.win_size_entropies[0]);
+                query.bind(24, e.tos_entropies[0]);
+                query.bind(25, e.mss_entropies[0]);
+                query.bind(26, e.port_entropies[1]);
+                query.bind(27, e.ttl_entropies[1]);
+                query.bind(28, e.win_size_entropies[1]);
+                query.bind(29, e.tos_entropies[1]);
+                query.bind(30, e.mss_entropies[1]);
                 query.exec();
                 query.reset();
 

+ 1 - 1
code_boost/src/cxx/statistics_db.h

@@ -25,7 +25,7 @@ public:
     /*
      * Database version: Increment number on every change in the C++ code!
      */
-    static const int DB_VERSION = 16;
+    static const int DB_VERSION = 17;
 
     /*
      * Methods to read from database