Procházet zdrojové kódy

add max/min packet rate to ip_statistics table

aidmar.wainakh před 6 roky
rodič
revize
7f6ebed361

binární
code_boost/src/cxx/.statistics_db.cpp.kate-swp


+ 3 - 3
code_boost/src/cxx/pcap_processor.cpp

@@ -116,7 +116,7 @@ void pcap_processor::collect_statistics() {
         // Aidmar
         int counter=0;
         int timeIntervalNum = 1;
-        std::chrono::duration<int, std::micro> timeInterval(10000000); // 10 sec
+        std::chrono::duration<int, std::micro> timeInterval(10000000); // 5 sec
         std::chrono::microseconds intervalStartTimestamp = stats.getTimestampFirstPacket();
         std::chrono::microseconds firstTimestamp = stats.getTimestampFirstPacket();
         int pktsInterval = 1000;
@@ -136,9 +136,9 @@ void pcap_processor::collect_statistics() {
             std::chrono::microseconds currentCaptureDuration = lastPktTimestamp - firstTimestamp;
             std::chrono::microseconds barrier =  timeIntervalNum*timeInterval;
             if(currentCaptureDuration>barrier){
-                //std::cout<<"LastpkstTimstamp:" << lastPktTimestamp.count() << ", currentCaptureDuration:"<< currentCaptureDuration.count() << ", barrier:" <<barrier.count()<<", interval:" << timeIntervalNum << ", interval time:"<<timeInterval.count()<<"\n";
-                
+                //std::cout<<"LastpkstTimstamp:" << lastPktTimestamp.count() << ", currentCaptureDuration:"<< currentCaptureDuration.count() << ", barrier:" <<barrier.count()<<", interval:" << timeIntervalNum << ", interval time:"<<timeInterval.count()<<"\n";                
                 stats.calculateLastIntervalIPsEntropy(filePath, intervalStartTimestamp);
+                stats.calculateLastIntervalPacketRate(timeInterval, intervalStartTimestamp);
                 timeIntervalNum++;   
                 intervalStartTimestamp = lastPktTimestamp;
             }

+ 50 - 9
code_boost/src/cxx/statistics.cpp

@@ -97,10 +97,11 @@ int getClosestIndex(std::vector<std::chrono::microseconds> v, std::chrono::micro
 
 // Aidmar
 /**
- * Calculate entropy of source and destination IPs for last time interval and write results to a ip_entropy_interval.csv file.
+ * Calculate entropy of source and destination IPs for last time interval.
+ * The results are written to ip_entropy_interval.csv file.
+ * @param filePath The input (background) PCAP file path.
  * @param intervalStartTimestamp The timstamp where the interval starts.
  */
-
 void statistics::calculateLastIntervalIPsEntropy(std::string filePath, std::chrono::microseconds intervalStartTimestamp){
         std::vector <int> IPsSrcPktsCounts; 
         std::vector <int> IPsDstPktsCounts; 
@@ -111,6 +112,7 @@ void statistics::calculateLastIntervalIPsEntropy(std::string filePath, std::chro
         int pktsSent = 0, pktsReceived = 0;
         
         for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
+            // TO-DO: should add this condition to avoid Segmentation Fault    if(i->second.pktsSentTimestamp.size()>0)
             int indexStartSent = getClosestIndex(i->second.pktsSentTimestamp, intervalStartTimestamp);                         
             int IPsSrcPktsCount = i->second.pktsSentTimestamp.size() - indexStartSent;
             IPsSrcPktsCounts.push_back(IPsSrcPktsCount);
@@ -121,7 +123,8 @@ void statistics::calculateLastIntervalIPsEntropy(std::string filePath, std::chro
             int IPsDstPktsCount = i->second.pktsReceivedTimestamp.size() - indexStartReceived;       
             IPsDstPktsCounts.push_back(IPsDstPktsCount);
             pktsReceived += IPsDstPktsCount;
-        }  
+            } 
+        
        
          for (auto i = IPsSrcPktsCounts.begin(); i != IPsSrcPktsCounts.end(); i++) {
                 IPsSrcProb.push_back((float)*i/pktsSent);
@@ -162,6 +165,28 @@ void statistics::calculateLastIntervalIPsEntropy(std::string filePath, std::chro
       file.close();         
 }
 
+// Aidmar
+/**
+ * Calculate sending packet rate for each IP in last time interval.
+ * @param intervalStartTimestamp The timstamp where the interval starts.
+ */
+void statistics::calculateLastIntervalPacketRate(std::chrono::duration<int, std::micro> interval, std::chrono::microseconds intervalStartTimestamp){        
+        for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
+                int indexStartSent = getClosestIndex(i->second.pktsSentTimestamp, intervalStartTimestamp);     
+                std::cout<<i->first<<", PktsSent:"<<i->second.pktsSentTimestamp.size()<<",indexStart:"<<indexStartSent<<"\n";            
+                int IPsSrcPktsCount = i->second.pktsSentTimestamp.size() - indexStartSent;                                       
+
+                std::cout<<"IPsSrcPktsCount: "<<IPsSrcPktsCount<<", Interval: "<< interval.count() <<"\n";
+
+                float interval_pkt_rate = (float) IPsSrcPktsCount * 1000000 / interval.count(); // used 10^6 because interval in microseconds
+                std::cout<<"interval_pkt_rate:"<<interval_pkt_rate<<"\n";
+                i->second.interval_pkt_rate.push_back(0);//interval_pkt_rate);  
+                if(interval_pkt_rate > i->second.max_pkt_rate || i->second.max_pkt_rate == 0)
+                    i->second.max_pkt_rate = interval_pkt_rate;
+                if(interval_pkt_rate < i->second.min_pkt_rate || i->second.min_pkt_rate == 0)
+                    i->second.min_pkt_rate = interval_pkt_rate;                    
+        }
+}
 
 // Aidmar - incomplete
 /**
@@ -207,8 +232,8 @@ void statistics::calculateIntervalIPsEntropy(std::chrono::microseconds interval)
 // Aidmar
 /**
  * Calculate cumulative entropy of source and destination IPs; the entropy for packets from the beginning of the pcap file. 
- * The function write the results to filePath_ip_entropy.csv file.
- * @param filePath The PCAP fiel path.
+ * The results are written to filePath_ip_entropy.csv file.
+ * @param filePath The input (background) PCAP file path.
  */
 void statistics::addIPEntropy(std::string filePath){
     std::vector <std::string> IPs; 
@@ -261,7 +286,7 @@ void statistics::addIPEntropy(std::string filePath){
 
 // Aidmar
 /**
- * Increments the packet counter for the given flow.
+ * Increments the packet counter for the given conversation.
  * @param ipAddressSender The sender IP address.
  * @param sport The source port.
  * @param ipAddressReceiver The receiver IP address.
@@ -279,7 +304,7 @@ void statistics::addFlowStat(std::string ipAddressSender,int sport,std::string i
         flow_statistics[f1].pkts_B_A++;
         flow_statistics[f1].pkts_B_A_timestamp.push_back(timestamp);
         if(flow_statistics[f1].pkts_A_B_timestamp.size()>0){
-            flow_statistics[f1].pkts_delay.push_back(std::chrono::duration_cast<std::chrono::microseconds> (timestamp - flow_statistics[f1].pkts_A_B_timestamp[flow_statistics[f1].pkts_A_B_timestamp.size()-1]));
+            flow_statistics[f1].pkts_delay.push_back(std::chrono::duration_cast<std::chrono::microseconds> (timestamp - flow_statistics[f1].pkts_A_B_timestamp[flow_statistics[f1].pkts_A_B_timestamp.size()-1])); // TO-DO: use .back()
         }
         
         //std::cout<<timestamp.count()<<"::"<<ipAddressReceiver<<":"<<dport<<","<<ipAddressSender<<":"<<sport<<"\n"; 
@@ -290,7 +315,7 @@ void statistics::addFlowStat(std::string ipAddressSender,int sport,std::string i
         flow_statistics[f2].pkts_A_B++;
         flow_statistics[f2].pkts_A_B_timestamp.push_back(timestamp);
          if(flow_statistics[f2].pkts_B_A_timestamp.size()>0){
-            flow_statistics[f2].pkts_delay.push_back(std::chrono::duration_cast<std::chrono::microseconds> (timestamp - flow_statistics[f2].pkts_B_A_timestamp[flow_statistics[f2].pkts_B_A_timestamp.size()-1]));
+            flow_statistics[f2].pkts_delay.push_back(std::chrono::duration_cast<std::chrono::microseconds> (timestamp - flow_statistics[f2].pkts_B_A_timestamp[flow_statistics[f2].pkts_B_A_timestamp.size()-1])); // TO-DO: use .back()
         }
         //std::cout<<timestamp.count()<<"::"<<ipAddressSender<<":"<<sport<<","<<ipAddressReceiver<<":"<<dport<<"\n"; 
         //std::cout<<flow_statistics[f2].pkts_A_B<<"\n";
@@ -391,6 +416,10 @@ void statistics::addIpStat_packetSent(std::string filePath, std::string ipAddres
         // Add the IP class
         ip_statistics[ipAddressSender].ip_class = getIPv4Class(ipAddressSender);
         
+        // Initialize packet rates
+        ip_statistics[ipAddressSender].max_pkt_rate = 0;
+        ip_statistics[ipAddressSender].min_pkt_rate = 0;
+        
         // Caculate Mahoney anomaly score for ip.src
         float ipSrc_Mahoney_score = 0;
         // s_r: The number of IP sources (the different values)
@@ -429,7 +458,8 @@ void statistics::addIpStat_packetSent(std::string filePath, std::string ipAddres
     std::ofstream file;
     file.open (new_filepath,std::ios_base::app);
     file << ipAddressSender << ","<< s_t << "," << n << "," << s_r << "," << ipSrc_Mahoney_score << "\n";
-    file.close();    
+    file.close();  
+    
     ip_statistics[ipAddressSender].firstAppearAsSenderPktCount = packetCount;  
     ip_statistics[ipAddressSender].sourceAnomalyScore = ipSrc_Mahoney_score;    
     }
@@ -489,6 +519,17 @@ void statistics::addIpStat_packetSent(std::string filePath, std::string ipAddres
     // Aidmar
     ip_statistics[ipAddressSender].pktsSentTimestamp.push_back(timestamp);
     
+    // Aidmar - calculate packet rate (assumption: max_pkt_rate=1/smallest time between two consecutive pkts)
+    // resulting in very big rates, therefore it could be better to calculate pkt rate on time intervals
+    /*if(ip_statistics[ipAddressSender].pktsSentTimestamp.size() > 0){
+    std::chrono::microseconds temp_pkt_consecutive_time = timestamp - ip_statistics[ipAddressSender].pktsSentTimestamp.back();
+    float temp_pkt_rate = (float) 1000000/temp_pkt_consecutive_time.count(); // pkt per sec = 10**6/micro sec
+    if(temp_pkt_rate > ip_statistics[ipAddressSender].max_pkt_rate || ip_statistics[ipAddressSender].max_pkt_rate == 0)
+        ip_statistics[ipAddressSender].max_pkt_rate = temp_pkt_rate;
+    if(temp_pkt_rate < ip_statistics[ipAddressSender].min_pkt_rate || ip_statistics[ipAddressSender].min_pkt_rate == 0)
+        ip_statistics[ipAddressSender].min_pkt_rate = temp_pkt_rate;
+    }*/
+                
     // Update stats for packet receiver
     ip_statistics[ipAddressReceiver].kbytes_received += (float(bytesSent) / 1024);
     ip_statistics[ipAddressReceiver].pkts_received++;  

+ 28 - 1
code_boost/src/cxx/statistics.h

@@ -39,6 +39,7 @@ struct ip_stats {
     long AvgMaxSegmentSizeTCP;
 };
 
+
 // Aidmar
 /*
  * Struct used to represent a flow by:
@@ -140,6 +141,9 @@ struct entry_ipStat {
     float kbytes_sent;
     // Aidmar
     std::string ip_class;
+    std::vector<float> interval_pkt_rate;
+    float max_pkt_rate;
+    float min_pkt_rate;
     // Aidmar - to calculate Mahoney anomaly score
     long firstAppearAsSenderPktCount;
     long firstAppearAsReceiverPktCount;
@@ -154,8 +158,11 @@ struct entry_ipStat {
                && pkts_sent == other.pkts_sent
                && kbytes_sent == other.kbytes_sent
                && kbytes_received == other.kbytes_received
+                // Aidmar
+               && interval_pkt_rate == other.interval_pkt_rate
+               && max_pkt_rate == other.max_pkt_rate
+               && min_pkt_rate == other.min_pkt_rate
                && ip_class == other.ip_class
-               // Aidmar
                && firstAppearAsSenderPktCount == other.firstAppearAsSenderPktCount
                && firstAppearAsReceiverPktCount == other.firstAppearAsReceiverPktCount
                && sourceAnomalyScore == other.sourceAnomalyScore
@@ -165,6 +172,25 @@ struct entry_ipStat {
     }
 };
 
+// Aidmar
+/*
+ * Struct used to represent interval statistics:
+ * - Number of packets
+ * - IP source entropy
+ * - IP destination entropy
+ */
+struct entry_intervalStat {
+    long pkts_count;
+    float ip_src_entropy;
+    float ip_dst_entropy;
+
+    bool operator==(const entry_intervalStat &other) const {
+        return pkts_count == other.pkts_count
+               && ip_src_entropy == other.ip_src_entropy
+               && ip_dst_entropy == other.ip_dst_entropy;
+    }
+};
+
 // Aidmar
 /*
  * Struct used to represent:
@@ -308,6 +334,7 @@ public:
     void addIPEntropy(std::string filePath);
     void addFlowStat(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp);
     void calculateLastIntervalIPsEntropy(std::string filePath, std::chrono::microseconds intervalStartTimestamp);
+    void calculateLastIntervalPacketRate(std::chrono::duration<int, std::micro> interval, std::chrono::microseconds intervalStartTimestamp);
 
     void incrementTTLcount(std::string ipAddress, int ttlValue);
 

+ 52 - 2
code_boost/src/cxx/statistics_db.cpp

@@ -30,10 +30,12 @@ void statistics_db::writeStatisticsIP(std::unordered_map<std::string, entry_ipSt
                 "pktsSent INTEGER, "
                 "kbytesReceived REAL, "
                 "kbytesSent REAL, "
+                "maxPktRate REAL,"
+                "minPktRate REAL,"
                 "class TEXT, "
                 "PRIMARY KEY(ipAddress));";
         db->exec(createTable);
-        SQLite::Statement query(*db, "INSERT INTO ip_statistics VALUES (?, ?, ?, ?, ?, ?)");
+        SQLite::Statement query(*db, "INSERT INTO ip_statistics VALUES (?, ?, ?, ?, ?, ?, ?, ?)");
         for (auto it = ipStatistics.begin(); it != ipStatistics.end(); ++it) {
             entry_ipStat e = it->second;
             query.bind(1, it->first);
@@ -42,7 +44,9 @@ void statistics_db::writeStatisticsIP(std::unordered_map<std::string, entry_ipSt
             query.bind(4, e.kbytes_received);
             query.bind(5, e.kbytes_sent);
             // Aidmar
-            query.bind(6, e.ip_class);
+            query.bind(6, e.max_pkt_rate);
+            query.bind(7, e.min_pkt_rate);
+            query.bind(8, e.ip_class);
             query.exec();
             query.reset();
         }
@@ -352,3 +356,49 @@ void statistics_db::writeStatisticsFlow(std::unordered_map<flow, entry_flowStat>
         std::cout << "Exception in statistics_db: " << e.what() << std::endl;
     }
 }
+
+// Aidamr
+/**
+ * Writes the interval statistics into the database.
+ * @param intervalStatistics The flow from class statistics.
+ */
+void statistics_db::writeStatisticsInterval(std::unordered_map<std::string, entry_intervalStat> intervalStatistics){          
+    try {
+        db->exec("DROP TABLE IF EXISTS interval_statistics");
+        SQLite::Transaction transaction(*db);
+        const char *createTable = "CREATE TABLE flow_statistics ("
+                "ipAddressA TEXT,"
+                "portA INTEGER,"
+                "ipAddressB TEXT,"              
+                "portB INTEGER,"
+                "pkts_A_B INTEGER,"
+                "pkts_B_A INTEGER,"
+                "medianDelay INTEGER,"
+                //"medianDelay TEXT,"
+                "PRIMARY KEY(ipAddressA,portA,ipAddressB,portB));";
+        db->exec(createTable);
+        SQLite::Statement query(*db, "INSERT INTO flow_statistics VALUES (?, ?, ?, ?, ?, ?, ?)");
+        for (auto it = flowStatistics.begin(); it != flowStatistics.end(); ++it) {
+            flow f = it->first;
+            entry_flowStat e = it->second;
+            
+            // Compute the median delay
+            e.median_delay = e.pkts_delay[e.pkts_delay.size()/2];
+            
+            query.bind(1, f.ipAddressA);
+            query.bind(2, f.portA);
+            query.bind(3, f.ipAddressB);
+            query.bind(4, f.portB);
+            query.bind(5, (int) e.pkts_A_B);
+            query.bind(6, (int) e.pkts_B_A);
+            query.bind(7, (int) e.median_delay.count());
+            //query.bind(7,  std::to_string(e.median_delay.count()));            
+            query.exec();
+            query.reset();
+        }
+        transaction.commit();
+    }
+    catch (std::exception &e) {
+        std::cout << "Exception in statistics_db: " << e.what() << std::endl;
+    }
+}

+ 2 - 1
code_boost/src/cxx/statistics_db.h

@@ -39,10 +39,11 @@ public:
                              std::string timestampLastPkt, float avgPacketRate, float avgPacketSize,
                              float avgPacketsSentPerHost, float avgBandwidthIn, float avgBandwidthOut);
 
-    // Aidmar
+    // Aidmar - new tables
     void writeStatisticsMss_dist(std::unordered_map<ipAddress_mss, int> mssDistribution);
     void writeStatisticsWin(std::unordered_map<ipAddress_win, int> winDistribution);
     void writeStatisticsFlow(std::unordered_map<flow, entry_flowStat> flowStatistics);
+    void writeStatisticsInterval(std::unordered_map<std::string, entry_intervalStat> intervalStatistics);
 
 private:
     // Pointer to the SQLite database