Browse Source

fix normalized entropy calculation for IP

Jens Keim 5 years ago
parent
commit
e6a6628a2c

+ 11 - 2
code_boost/src/cxx/statistics.cpp

@@ -133,11 +133,16 @@ std::vector<double> statistics::calculateLastIntervalIPsEntropy(std::chrono::mic
         this->ip_src_novel_count = IPsSrcNovelPktsCounts.size();
         this->ip_dst_novel_count = IPsDstNovelPktsCounts.size();
 
-        std::vector<double> entropies = {IPsSrcEntropy, IPsDstEntropy, IPsSrcNovelEntropy, IPsDstNovelEntropy};
+        double norm_src_entropy = IPsSrcEntropy / log2(IPsSrcPktsCounts.size());
+        double norm_dst_entropy = IPsDstEntropy / log2(IPsDstPktsCounts.size());
+        double norm_novel_src_entropy = IPsSrcNovelEntropy / log2(IPsSrcNovelPktsCounts.size());
+        double norm_novel_dst_entropy = IPsDstNovelEntropy / log2(IPsDstNovelPktsCounts.size());
+
+        std::vector<double> entropies = {IPsSrcEntropy, IPsDstEntropy, IPsSrcNovelEntropy, IPsDstNovelEntropy, norm_src_entropy, norm_dst_entropy, norm_novel_src_entropy, norm_novel_dst_entropy};
         return entropies;
     }
     else {
-        return {-1, -1, -1, -1};
+        return {-1, -1, -1, -1, -1, -1, -1, -1};
     }
 }
 
@@ -315,6 +320,10 @@ void statistics::addIntervalStat(std::chrono::duration<int, std::micro> interval
         interval_statistics[lastPktTimestamp_s].ip_dst_entropy = ipEntopies[1];
         interval_statistics[lastPktTimestamp_s].ip_src_novel_entropy = ipEntopies[2];
         interval_statistics[lastPktTimestamp_s].ip_dst_novel_entropy = ipEntopies[3];
+        interval_statistics[lastPktTimestamp_s].ip_src_entropy_norm = ipEntopies[4];
+        interval_statistics[lastPktTimestamp_s].ip_dst_entropy_norm = ipEntopies[5];
+        interval_statistics[lastPktTimestamp_s].ip_src_novel_entropy_norm = ipEntopies[6];
+        interval_statistics[lastPktTimestamp_s].ip_dst_novel_entropy_norm = ipEntopies[7];
     }
     if(ipCumEntopies.size()>1){
         interval_statistics[lastPktTimestamp_s].ip_src_cum_entropy = ipCumEntopies[0];

+ 8 - 0
code_boost/src/cxx/statistics.h

@@ -246,6 +246,10 @@ struct entry_intervalStat {
     double ip_dst_entropy;
     double ip_src_novel_entropy;
     double ip_dst_novel_entropy;
+    double ip_src_entropy_norm;
+    double ip_dst_entropy_norm;
+    double ip_src_novel_entropy_norm;
+    double ip_dst_novel_entropy_norm;
     double ip_src_cum_entropy;
     double ip_dst_cum_entropy;
     std::vector<double> ttl_entropies;
@@ -276,6 +280,10 @@ struct entry_intervalStat {
                && ip_dst_entropy == other.ip_dst_entropy
                && ip_src_novel_entropy == other.ip_src_novel_entropy
                && ip_dst_novel_entropy == other.ip_dst_novel_entropy
+               && ip_src_entropy_norm == other.ip_src_entropy_norm
+               && ip_dst_entropy_norm == other.ip_dst_entropy_norm
+               && ip_src_novel_entropy_norm == other.ip_src_novel_entropy_norm
+               && ip_dst_novel_entropy_norm == other.ip_dst_novel_entropy_norm
                && ip_src_cum_entropy == other.ip_src_cum_entropy
                && ip_dst_cum_entropy == other.ip_dst_cum_entropy
                && payload_count == other.payload_count

+ 4 - 20
code_boost/src/cxx/statistics_db.cpp

@@ -713,26 +713,10 @@ void statistics_db::writeStatisticsInterval(const std::unordered_map<std::string
                     "ip_dst_novel_entropy_normalized REAL,"
                     "PRIMARY KEY(last_pkt_timestamp));");
 
-            double ip_src_entropy = 0.0;
-            double ip_dst_entropy = 0.0;
-            double ip_src_novel_entropy = 0.0;
-            double ip_dst_novel_entropy = 0.0;
             double ip_src_cum_entropy = 0.0;
             double ip_dst_cum_entropy = 0.0;
             for (auto it = intervalStatistics.begin(); it != intervalStatistics.end(); ++it) {
                 const entry_intervalStat &e = it->second;
-                if (ip_src_entropy < e.ip_src_entropy) {
-                    ip_src_entropy = e.ip_src_entropy;
-                }
-                if (ip_dst_entropy < e.ip_dst_entropy) {
-                    ip_dst_entropy = e.ip_dst_entropy;
-                }
-                if (ip_src_novel_entropy < e.ip_src_novel_entropy) {
-                    ip_src_novel_entropy = e.ip_src_novel_entropy;
-                }
-                if (ip_dst_novel_entropy < e.ip_dst_novel_entropy) {
-                    ip_dst_novel_entropy = e.ip_dst_novel_entropy;
-                }
                 if (ip_src_cum_entropy < e.ip_src_cum_entropy) {
                     ip_src_cum_entropy = e.ip_src_cum_entropy;
                 }
@@ -785,14 +769,14 @@ void statistics_db::writeStatisticsInterval(const std::unordered_map<std::string
                 query.bind(38, e.win_size_entropies[3]);
                 query.bind(39, e.tos_entropies[3]);
                 query.bind(40, e.mss_entropies[3]);
-                query.bind(41, e.ip_src_entropy/ip_src_entropy);
-                query.bind(42, e.ip_dst_entropy/ip_dst_entropy);
+                query.bind(41, e.ip_src_entropy_norm);
+                query.bind(42, e.ip_dst_entropy_norm);
                 query.bind(43, e.ip_src_cum_entropy/ip_src_cum_entropy);
                 query.bind(44, e.ip_dst_cum_entropy/ip_dst_cum_entropy);
                 query.bind(45, e.ip_src_novel_entropy);
                 query.bind(46, e.ip_dst_novel_entropy);
-                query.bind(47, e.ip_src_novel_entropy/ip_src_novel_entropy);
-                query.bind(48, e.ip_dst_novel_entropy/ip_dst_novel_entropy);
+                query.bind(47, e.ip_src_novel_entropy_norm);
+                query.bind(48, e.ip_dst_novel_entropy_norm);
                 query.exec();
                 query.reset();