Browse Source

fix normalized entropy calculation

Jens Keim 5 years ago
parent
commit
d37342ac44
2 changed files with 15 additions and 52 deletions
  1. 5 2
      code_boost/src/cxx/statistics.cpp
  2. 10 50
      code_boost/src/cxx/statistics_db.cpp

+ 5 - 2
code_boost/src/cxx/statistics.cpp

@@ -204,7 +204,7 @@ void statistics::calculateIPIntervalPacketRate(std::chrono::duration<int, std::m
  * Calculates the entropies for the count of integer values.
  * @param current map containing the values with counts
  * @param an old map containing the values with counts (from last iteration)
- * @return a vector containing the calculated entropies: entropy of all updated values, entropy of all novel values
+ * @return a vector containing the calculated entropies: entropy of all updated values, entropy of all novel values, normalized entropy of all, normalized entropy of novel
  */
 std::vector<double> statistics::calculateEntropies(std::unordered_map<int, int> &map, std::unordered_map<int, int> &old) {
     std::vector<double> counts;
@@ -246,7 +246,10 @@ std::vector<double> statistics::calculateEntropies(std::unordered_map<int, int>
         novel_entropy += -1 * novel_prob * log2(novel_prob);
     }
 
-    return {entropy, novel_entropy};
+    double norm_entropy = entropy / log2(counts.size());
+    double norm_novel_entropy = novel_entropy / log2(novel_counts.size());
+
+    return {entropy, novel_entropy, norm_entropy, norm_novel_entropy};
 }
 
 /**

+ 10 - 50
code_boost/src/cxx/statistics_db.cpp

@@ -719,16 +719,6 @@ void statistics_db::writeStatisticsInterval(const std::unordered_map<std::string
             double ip_dst_novel_entropy = 0.0;
             double ip_src_cum_entropy = 0.0;
             double ip_dst_cum_entropy = 0.0;
-            double ttl_entropy = 0.0;
-            double win_size_entropy = 0.0;
-            double tos_entropy = 0.0;
-            double mss_entropy = 0.0;
-            double port_entropy = 0.0;
-            double ttl_novel_entropy = 0.0;
-            double win_size_novel_entropy = 0.0;
-            double tos_novel_entropy = 0.0;
-            double mss_novel_entropy = 0.0;
-            double port_novel_entropy = 0.0;
             for (auto it = intervalStatistics.begin(); it != intervalStatistics.end(); ++it) {
                 const entry_intervalStat &e = it->second;
                 if (ip_src_entropy < e.ip_src_entropy) {
@@ -749,36 +739,6 @@ void statistics_db::writeStatisticsInterval(const std::unordered_map<std::string
                 if (ip_dst_cum_entropy < e.ip_dst_cum_entropy) {
                     ip_dst_cum_entropy = e.ip_dst_cum_entropy;
                 }
-                if (ttl_entropy < e.ttl_entropies[0]) {
-                    ttl_entropy = e.ttl_entropies[0];
-                }
-                if (win_size_entropy < e.win_size_entropies[0]) {
-                    win_size_entropy = e.win_size_entropies[0];
-                }
-                if (tos_entropy < e.tos_entropies[0]) {
-                    tos_entropy = e.tos_entropies[0];
-                }
-                if (mss_entropy < e.mss_entropies[0]) {
-                    mss_entropy = e.mss_entropies[0];
-                }
-                if (port_entropy < e.port_entropies[0]) {
-                    port_entropy = e.port_entropies[0];
-                }
-                if (ttl_novel_entropy < e.ttl_entropies[1]) {
-                    ttl_novel_entropy = e.ttl_entropies[1];
-                }
-                if (win_size_novel_entropy < e.win_size_entropies[1]) {
-                    win_size_novel_entropy = e.win_size_entropies[1];
-                }
-                if (tos_novel_entropy < e.tos_entropies[1]) {
-                    tos_novel_entropy = e.tos_entropies[1];
-                }
-                if (mss_novel_entropy < e.mss_entropies[1]) {
-                    mss_novel_entropy = e.mss_entropies[1];
-                }
-                if (port_novel_entropy < e.port_entropies[1]) {
-                    port_novel_entropy = e.port_entropies[1];
-                }
             }
 
             SQLite::Statement query(*db, "INSERT INTO " + table_name + " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
@@ -815,16 +775,16 @@ void statistics_db::writeStatisticsInterval(const std::unordered_map<std::string
                 query.bind(28, e.win_size_entropies[1]);
                 query.bind(29, e.tos_entropies[1]);
                 query.bind(30, e.mss_entropies[1]);
-                query.bind(31, e.port_entropies[0]/port_entropy);
-                query.bind(32, e.ttl_entropies[0]/ttl_entropy);
-                query.bind(33, e.win_size_entropies[0]/win_size_entropy);
-                query.bind(34, e.tos_entropies[0]/tos_entropy);
-                query.bind(35, e.mss_entropies[0]/mss_entropy);
-                query.bind(36, e.port_entropies[1]/port_novel_entropy);
-                query.bind(37, e.ttl_entropies[1]/ttl_novel_entropy);
-                query.bind(38, e.win_size_entropies[1]/win_size_novel_entropy);
-                query.bind(39, e.tos_entropies[1]/tos_novel_entropy);
-                query.bind(40, e.mss_entropies[1]/mss_novel_entropy);
+                query.bind(31, e.port_entropies[2]);
+                query.bind(32, e.ttl_entropies[2]);
+                query.bind(33, e.win_size_entropies[2]);
+                query.bind(34, e.tos_entropies[2]);
+                query.bind(35, e.mss_entropies[2]);
+                query.bind(36, e.port_entropies[3]);
+                query.bind(37, e.ttl_entropies[3]);
+                query.bind(38, e.win_size_entropies[3]);
+                query.bind(39, e.tos_entropies[3]);
+                query.bind(40, e.mss_entropies[3]);
                 query.bind(41, e.ip_src_entropy/ip_src_entropy);
                 query.bind(42, e.ip_dst_entropy/ip_dst_entropy);
                 query.bind(43, e.ip_src_cum_entropy/ip_src_cum_entropy);