123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751 |
- #include <iostream>
- #include <fstream>
- #include <vector>
- #include <math.h>
- #include <algorithm>
- #include "statistics.h"
- #include <sstream>
- #include <SQLiteCpp/SQLiteCpp.h>
- #include "statistics_db.h"
- std::vector<std::string> split(std::string str, char delimiter) {
- std::vector<std::string> internal;
- std::stringstream ss(str);
- std::string tok;
- while(getline(ss, tok, delimiter)) {
- internal.push_back(tok);
- }
- return internal;
- }
- std::string getIPv4Class(std::string ipAddress){
- std::string ipClass="Unknown";
-
- std::vector<std::string> ipBytes = split(ipAddress, '.');
-
- std::cout<< ipAddress << "\n";
-
- if(ipBytes.size()>1){
- int b1 = std::stoi(ipBytes[0]);
- int b2 = std::stoi(ipBytes[1]);
-
- if(b1 >= 1 && b1 <= 126){
- if(b1 == 10)
- ipClass = "A-private";
- else
- ipClass = "A";
- }
- else if(b1 == 127){
- ipClass = "A-unused";
- }
- else if (b1 >= 128 && b1 <= 191){
- if(b1 == 172 && b2 >= 16 && b2 <= 31)
- ipClass = "B-private";
- else
- ipClass = "B";
- }
- else if (b1 >= 192 && b1 <= 223){
- if(b1 == 192 && b2 == 168)
- ipClass = "C-private";
- else
- ipClass = "C";
- }
- else if (b1 >= 224 && b1 <= 239)
- ipClass = "D";
- else if (b1 >= 240 && b1 <= 254)
- ipClass = "E";
- }
-
- return ipClass;
- }
- int getClosestIndex(std::vector<std::chrono::microseconds> v, std::chrono::microseconds refElem)
- {
- auto i = min_element(begin(v), end(v), [=] (std::chrono::microseconds x, std::chrono::microseconds y)
- {
- return std::abs((x - refElem).count()) < std::abs((y - refElem).count());
- });
- return std::distance(begin(v), i);
- }
- void statistics::calculateLastIntervalIPsEntropy(std::string filePath, std::chrono::microseconds intervalStartTimestamp){
- std::vector <int> IPsSrcPktsCounts;
- std::vector <int> IPsDstPktsCounts;
-
- std::vector <float> IPsSrcProb;
- std::vector <float> IPsDstProb;
-
- int pktsSent = 0, pktsReceived = 0;
-
- for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
- int indexStartSent = getClosestIndex(i->second.pktsSentTimestamp, intervalStartTimestamp);
- int IPsSrcPktsCount = i->second.pktsSentTimestamp.size() - indexStartSent;
- IPsSrcPktsCounts.push_back(IPsSrcPktsCount);
- pktsSent += IPsSrcPktsCount;
-
-
- int indexStartReceived = getClosestIndex(i->second.pktsReceivedTimestamp, intervalStartTimestamp);
- int IPsDstPktsCount = i->second.pktsReceivedTimestamp.size() - indexStartReceived;
- IPsDstPktsCounts.push_back(IPsDstPktsCount);
- pktsReceived += IPsDstPktsCount;
- }
-
- for (auto i = IPsSrcPktsCounts.begin(); i != IPsSrcPktsCounts.end(); i++) {
- IPsSrcProb.push_back((float)*i/pktsSent);
-
- }
- for (auto i = IPsDstPktsCounts.begin(); i != IPsDstPktsCounts.end(); i++) {
- IPsDstProb.push_back((float)*i/pktsReceived);
-
- }
-
-
- float IPsSrcEntropy = 0;
- for(unsigned i=0; i < IPsSrcProb.size();i++){
- if (IPsSrcProb[i] > 0)
- IPsSrcEntropy += - IPsSrcProb[i]*log2(IPsSrcProb[i]);
- }
-
- float IPsDstEntropy = 0;
- for(unsigned i=0; i < IPsDstProb.size();i++){
- if (IPsDstProb[i] > 0)
- IPsDstEntropy += - IPsDstProb[i]*log2(IPsDstProb[i]);
- }
-
-
- std::string new_filepath = filePath;
- const std::string &newExt = "_ip_entropy_interval.csv";
- std::string::size_type h = new_filepath.rfind('.', new_filepath.length());
- if (h != std::string::npos) {
- new_filepath.replace(h, newExt.length(), newExt);
- } else {
- new_filepath.append(newExt);
- }
-
-
- std::ofstream file;
- file.open (new_filepath,std::ios_base::app);
- file << intervalStartTimestamp.count() << "," << IPsSrcEntropy << "," << IPsDstEntropy << "\n";
- file.close();
- }
- void statistics::addIPEntropy(std::string filePath){
- std::vector <std::string> IPs;
- std::vector <float> IPsSrcProb;
- std::vector <float> IPsDstProb;
- for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
- IPs.push_back(i->first);
- IPsSrcProb.push_back((float)i->second.pkts_sent/packetCount);
- IPsDstProb.push_back((float)i->second.pkts_received/packetCount);
-
-
-
- }
-
-
- float IPsSrcEntropy = 0;
- for(unsigned i=0; i < IPsSrcProb.size();i++){
- if (IPsSrcProb[i] > 0)
- IPsSrcEntropy += - IPsSrcProb[i]*log2(IPsSrcProb[i]);
- }
- std::cout << packetCount << ": SrcEnt: " << IPsSrcEntropy << "\n";
-
-
- float IPsDstEntropy = 0;
- for(unsigned i=0; i < IPsDstProb.size();i++){
- if (IPsDstProb[i] > 0)
- IPsDstEntropy += - IPsDstProb[i]*log2(IPsDstProb[i]);
- }
- std::cout << packetCount << ": DstEnt: " << IPsDstEntropy << "\n";
-
-
- std::ofstream file;
-
-
- std::string new_filepath = filePath;
- const std::string &newExt = "_ip_entropy.csv";
- std::string::size_type h = new_filepath.rfind('.', new_filepath.length());
- if (h != std::string::npos) {
- new_filepath.replace(h, newExt.length(), newExt);
- } else {
- new_filepath.append(newExt);
- }
-
-
- file.open (new_filepath,std::ios_base::app);
- file << packetCount << "," << IPsSrcEntropy << "," << IPsDstEntropy << "\n";
- file.close();
- }
- void statistics::addFlowStat(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp){
-
-
- flow f1 = {ipAddressReceiver, dport, ipAddressSender, sport};
- flow f2 = {ipAddressSender, sport, ipAddressReceiver, dport};
-
-
- if (flow_statistics.count(f1)>0){
- flow_statistics[f1].pkts_B_A++;
- flow_statistics[f1].pkts_B_A_timestamp.push_back(timestamp);
- if(flow_statistics[f1].pkts_A_B_timestamp.size()>0){
- flow_statistics[f1].pkts_delay.push_back(std::chrono::duration_cast<std::chrono::microseconds> (timestamp - flow_statistics[f1].pkts_A_B_timestamp[flow_statistics[f1].pkts_A_B_timestamp.size()-1]));
- }
-
-
-
-
- }
- else{
- flow_statistics[f2].pkts_A_B++;
- flow_statistics[f2].pkts_A_B_timestamp.push_back(timestamp);
- if(flow_statistics[f2].pkts_B_A_timestamp.size()>0){
- flow_statistics[f2].pkts_delay.push_back(std::chrono::duration_cast<std::chrono::microseconds> (timestamp - flow_statistics[f2].pkts_B_A_timestamp[flow_statistics[f2].pkts_B_A_timestamp.size()-1]));
- }
-
-
-
- }
- }
-
-
- void statistics::incrementMSScount(std::string ipAddress, int mssValue) {
- mss_distribution[{ipAddress, mssValue}]++;
- }
- void statistics::incrementWinCount(std::string ipAddress, int winSize) {
- win_distribution[{ipAddress, winSize}]++;
- }
- void statistics::incrementTTLcount(std::string ipAddress, int ttlValue) {
- ttl_distribution[{ipAddress, ttlValue}]++;
- }
- void statistics::incrementProtocolCount(std::string ipAddress, std::string protocol) {
- protocol_distribution[{ipAddress, protocol}]++;
- }
- int statistics::getProtocolCount(std::string ipAddress, std::string protocol) {
- return protocol_distribution[{ipAddress, protocol}];
- }
- void statistics::incrementPortCount(std::string ipAddressSender, int outgoingPort, std::string ipAddressReceiver,
- int incomingPort) {
- ip_ports[{ipAddressSender, "out", outgoingPort}]++;
- ip_ports[{ipAddressReceiver, "in", incomingPort}]++;
- }
- statistics::statistics(void) {
- }
- void statistics::assignMacAddress(std::string ipAddress, std::string macAddress) {
- ip_mac_mapping[ipAddress] = macAddress;
- }
- void statistics::addIpStat_packetSent(std::string filePath, std::string ipAddressSender, std::string ipAddressReceiver, long bytesSent, std::chrono::microseconds timestamp) {
-
-
- if(ip_statistics[ipAddressSender].pkts_sent==0){
-
- ip_statistics[ipAddressSender].ip_class = getIPv4Class(ipAddressSender);
-
-
- float ipSrc_Mahoney_score = 0;
-
-
-
- int s_t = 0, n = 0, s_r = 0;
- for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
- if (i->second.pkts_sent > 0)
- s_r++;
- }
- if(s_r > 0){
-
- n = packetCount;
-
- int pktCntNvlSndr = 0;
- for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
- if (pktCntNvlSndr < i->second.firstAppearAsSenderPktCount)
- pktCntNvlSndr = i->second.firstAppearAsSenderPktCount;
- }
-
- s_t = packetCount - pktCntNvlSndr + 1;
- ipSrc_Mahoney_score = (float)s_t*n/s_r;
- }
-
-
- std::string new_filepath = filePath;
- const std::string &newExt = "_ip_src_anomaly_score.csv";
- std::string::size_type h = new_filepath.rfind('.', new_filepath.length());
- if (h != std::string::npos) {
- new_filepath.replace(h, newExt.length(), newExt);
- } else {
- new_filepath.append(newExt);
- }
-
-
- std::ofstream file;
- file.open (new_filepath,std::ios_base::app);
- file << ipAddressSender << ","<< s_t << "," << n << "," << s_r << "," << ipSrc_Mahoney_score << "\n";
- file.close();
- ip_statistics[ipAddressSender].firstAppearAsSenderPktCount = packetCount;
- ip_statistics[ipAddressSender].sourceAnomalyScore = ipSrc_Mahoney_score;
- }
-
-
- if(ip_statistics[ipAddressReceiver].pkts_received==0){
-
- ip_statistics[ipAddressReceiver].ip_class = getIPv4Class(ipAddressReceiver);
-
-
- float ipDst_Mahoney_score = 0;
-
-
-
- int s_t = 0, n = 0, s_r = 0;
- for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
- if (i->second.pkts_received > 0)
- s_r++;
- }
- if(s_r > 0){
-
- n = packetCount;
-
- int pktCntNvlRcvr = 0;
- for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
- if (pktCntNvlRcvr < i->second.firstAppearAsReceiverPktCount)
- pktCntNvlRcvr = i->second.firstAppearAsReceiverPktCount;
- }
-
- s_t = packetCount - pktCntNvlRcvr + 1;
-
- ipDst_Mahoney_score = (float)s_t*n/s_r;
- }
-
-
- std::string new_filepath = filePath;
- const std::string &newExt = "_ip_dst_anomaly_score.csv";
- std::string::size_type h = new_filepath.rfind('.', new_filepath.length());
- if (h != std::string::npos) {
- new_filepath.replace(h, newExt.length(), newExt);
- } else {
- new_filepath.append(newExt);
- }
-
-
- std::ofstream file;
- file.open (new_filepath,std::ios_base::app);
- file << ipAddressReceiver << ","<< s_t << "," << n << "," << s_r << "," << ipDst_Mahoney_score << "\n";
- file.close();
- ip_statistics[ipAddressReceiver].firstAppearAsReceiverPktCount = packetCount;
- ip_statistics[ipAddressReceiver].destinationAnomalyScore = ipDst_Mahoney_score;
- }
-
-
- ip_statistics[ipAddressSender].kbytes_sent += (float(bytesSent) / 1024);
- ip_statistics[ipAddressSender].pkts_sent++;
-
- ip_statistics[ipAddressSender].pktsSentTimestamp.push_back(timestamp);
-
-
- ip_statistics[ipAddressReceiver].kbytes_received += (float(bytesSent) / 1024);
- ip_statistics[ipAddressReceiver].pkts_received++;
-
- ip_statistics[ipAddressReceiver].pktsReceivedTimestamp.push_back(timestamp);
- }
- void statistics::addMSS(std::string ipAddress, int MSSvalue) {
- ip_sumMss[ipAddress] += MSSvalue;
- }
- void statistics::setTimestampFirstPacket(Tins::Timestamp ts) {
- timestamp_firstPacket = ts;
- }
- void statistics::setTimestampLastPacket(Tins::Timestamp ts) {
- timestamp_lastPacket = ts;
- }
- Tins::Timestamp statistics::getTimestampFirstPacket() {
- return timestamp_firstPacket;
- }
- Tins::Timestamp statistics::getTimestampLastPacket() {
- return timestamp_lastPacket;
- }
- std::string statistics::getCaptureDurationTimestamp() const {
-
- time_t t = (timestamp_lastPacket.seconds() - timestamp_firstPacket.seconds());
- time_t ms = (timestamp_lastPacket.microseconds() - timestamp_firstPacket.microseconds());
- long int hour = t / 3600;
- long int remainder = (t - hour * 3600);
- long int minute = remainder / 60;
- long int second = (remainder - minute * 60) % 60;
- long int microseconds = ms;
-
- char out[64];
- sprintf(out, "%02ld:%02ld:%02ld.%06ld ", hour, minute, second, microseconds);
- return std::string(out);
- }
- float statistics::getCaptureDurationSeconds() const {
- timeval d;
- d.tv_sec = timestamp_lastPacket.seconds() - timestamp_firstPacket.seconds();
- d.tv_usec = timestamp_lastPacket.microseconds() - timestamp_firstPacket.microseconds();
- char tmbuf[64], buf[64];
- auto nowtm = localtime(&(d.tv_sec));
- strftime(tmbuf, sizeof(tmbuf), "%S", nowtm);
- snprintf(buf, sizeof(buf), "%s.%06u", tmbuf, (uint) d.tv_usec);
- return std::stof(std::string(buf));
- }
- std::string statistics::getFormattedTimestamp(time_t seconds, suseconds_t microseconds) const {
- timeval tv;
- tv.tv_sec = seconds;
- tv.tv_usec = microseconds;
- char tmbuf[64], buf[64];
- auto nowtm = localtime(&(tv.tv_sec));
- strftime(tmbuf, sizeof(tmbuf), "%Y-%m-%d %H:%M:%S", nowtm);
- snprintf(buf, sizeof(buf), "%s.%06u", tmbuf, (uint) tv.tv_usec);
- return std::string(buf);
- }
- ip_stats statistics::getStatsForIP(std::string ipAddress) {
- float duration = getCaptureDurationSeconds();
- entry_ipStat ipStatEntry = ip_statistics[ipAddress];
- ip_stats s;
- s.bandwidthKBitsIn = (ipStatEntry.kbytes_received / duration) * 8;
- s.bandwidthKBitsOut = (ipStatEntry.kbytes_sent / duration) * 8;
- s.packetPerSecondIn = (ipStatEntry.pkts_received / duration);
- s.packetPerSecondOut = (ipStatEntry.pkts_sent / duration);
- s.AvgPacketSizeSent = (ipStatEntry.kbytes_sent / ipStatEntry.pkts_sent);
- s.AvgPacketSizeRecv = (ipStatEntry.kbytes_received / ipStatEntry.pkts_received);
- int sumMSS = ip_sumMss[ipAddress];
- int tcpPacketsSent = getProtocolCount(ipAddress, "TCP");
- s.AvgMaxSegmentSizeTCP = ((sumMSS > 0 && tcpPacketsSent > 0) ? (sumMSS / tcpPacketsSent) : 0);
- return s;
- }
- void statistics::incrementPacketCount() {
- packetCount++;
- }
- void statistics::printStats(std::string ipAddress) {
- std::stringstream ss;
- ss << std::endl;
- ss << "Capture duration: " << getCaptureDurationSeconds() << " seconds" << std::endl;
- ss << "Capture duration (HH:MM:SS.mmmmmm): " << getCaptureDurationTimestamp() << std::endl;
- ss << "#Packets: " << packetCount << std::endl;
- ss << std::endl;
-
- if (ipAddress != "") {
- entry_ipStat e = ip_statistics[ipAddress];
- ss << "\n----- STATS FOR IP ADDRESS [" << ipAddress << "] -------" << std::endl;
- ss << std::endl << "KBytes sent: " << e.kbytes_sent << std::endl;
- ss << "KBytes received: " << e.kbytes_received << std::endl;
- ss << "Packets sent: " << e.pkts_sent << std::endl;
- ss << "Packets received: " << e.pkts_received << "\n\n";
- ip_stats is = getStatsForIP(ipAddress);
- ss << "Bandwidth IN: " << is.bandwidthKBitsIn << " kbit/s" << std::endl;
- ss << "Bandwidth OUT: " << is.bandwidthKBitsOut << " kbit/s" << std::endl;
- ss << "Packets per second IN: " << is.packetPerSecondIn << std::endl;
- ss << "Packets per second OUT: " << is.packetPerSecondOut << std::endl;
- ss << "Avg Packet Size Sent: " << is.AvgPacketSizeSent << " kbytes" << std::endl;
- ss << "Avg Packet Size Received: " << is.AvgPacketSizeRecv << " kbytes" << std::endl;
- ss << "Avg MSS: " << is.AvgMaxSegmentSizeTCP << " bytes" << std::endl;
- }
- std::cout << ss.str();
- }
- void statistics::writeToDatabase(std::string database_path) {
-
- float duration = getCaptureDurationSeconds();
- long sumPacketsSent = 0, senderCountIP = 0;
- float sumBandwidthIn = 0.0, sumBandwidthOut = 0.0;
- for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
- sumPacketsSent += i->second.pkts_sent;
-
- sumBandwidthIn += (i->second.kbytes_received / duration);
- sumBandwidthOut += (i->second.kbytes_sent / duration);
- senderCountIP++;
- }
- float avgPacketRate = (packetCount / duration);
- long avgPacketSize = getAvgPacketSize();
- long avgPacketsSentPerHost = (sumPacketsSent / senderCountIP);
- float avgBandwidthInKBits = (sumBandwidthIn / senderCountIP) * 8;
- float avgBandwidthOutInKBits = (sumBandwidthOut / senderCountIP) * 8;
-
- statistics_db db(database_path);
- db.writeStatisticsFile(packetCount, getCaptureDurationSeconds(),
- getFormattedTimestamp(timestamp_firstPacket.seconds(), timestamp_firstPacket.microseconds()),
- getFormattedTimestamp(timestamp_lastPacket.seconds(), timestamp_lastPacket.microseconds()),
- avgPacketRate, avgPacketSize, avgPacketsSentPerHost, avgBandwidthInKBits,
- avgBandwidthOutInKBits);
- db.writeStatisticsIP(ip_statistics);
- db.writeStatisticsTTL(ttl_distribution);
- db.writeStatisticsIpMac(ip_mac_mapping);
- db.writeStatisticsMss(ip_sumMss);
- db.writeStatisticsPorts(ip_ports);
- db.writeStatisticsProtocols(protocol_distribution);
-
- db.writeStatisticsMss_dist(mss_distribution);
- db.writeStatisticsWin(win_distribution);
- db.writeStatisticsFlow(flow_statistics);
- }
- float statistics::getAvgPacketSize() const {
-
- return (sumPacketSize / packetCount) / 1024;
- }
- void statistics::addPacketSize(uint32_t packetSize) {
- sumPacketSize += ((float) packetSize);
- }
|