statistics.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. // Aidmar
  2. #include <iostream>
  3. #include <fstream>
  4. #include <vector>
  5. #include <math.h>
  6. #include "statistics.h"
  7. #include <sstream>
  8. #include <SQLiteCpp/SQLiteCpp.h>
  9. #include "statistics_db.h"
  10. // Aidmar
  11. void statistics::addIPEntropy(){
  12. std::vector <std::string> IPs;
  13. std::vector <float> IPsSrcProb;
  14. std::vector <float> IPsDstProb;
  15. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  16. IPs.push_back(i->first);
  17. IPsSrcProb.push_back((float)i->second.pkts_sent/packetCount);
  18. IPsDstProb.push_back((float)i->second.pkts_received/packetCount);
  19. /*std::cout << i->first << ":" << i->second.pkts_sent << ":" << i->second.pkts_received << ":"
  20. << i->second.firstAppearAsSenderPktCount << ":" << i->second.firstAppearAsReceiverPktCount << ":"
  21. << packetCount << "\n";*/
  22. }
  23. // Calculate IP source entropy
  24. float IPsSrcEntropy = 0;
  25. for(unsigned i=0; i < IPsSrcProb.size();i++){
  26. if (IPsSrcProb[i] > 0)
  27. IPsSrcEntropy += - IPsSrcProb[i]*log2(IPsSrcProb[i]);
  28. }
  29. std::cout << packetCount << ": SrcEnt: " << IPsSrcEntropy << "\n";
  30. // Calculate IP destination entropy
  31. float IPsDstEntropy = 0;
  32. for(unsigned i=0; i < IPsDstProb.size();i++){
  33. if (IPsDstProb[i] > 0)
  34. IPsDstEntropy += - IPsDstProb[i]*log2(IPsDstProb[i]);
  35. }
  36. std::cout << packetCount << ": DstEnt: " << IPsDstEntropy << "\n";
  37. /*
  38. // Calculate IP source tn/r anomaly score
  39. float ipSrc_Mahoney_score = 0;
  40. // The number of IP sources (the different values)
  41. int s_r = 0;
  42. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  43. if (i->second.pkts_sent > 0)
  44. s_r++;
  45. }
  46. if(s_r > 0){
  47. // The number of the total instances
  48. int n = packetCount;
  49. // The packet count when the last novel IP was added as a sender
  50. int pktCntNvlSndr = 0;
  51. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  52. if (pktCntNvlSndr < i->second.firstAppearAsSenderPktCount)
  53. pktCntNvlSndr = i->second.firstAppearAsSenderPktCount;
  54. }
  55. // The "time" since last anomalous (novel) IP was appeared
  56. int s_t = packetCount - pktCntNvlSndr + 1;
  57. ipSrc_Mahoney_score = (float)s_t*n/s_r;
  58. std::cout << s_t << ":" << n << ":" << s_r << "\n";
  59. std::cout << packetCount << ": Mahoney score: " << ipSrc_Mahoney_score << "\n";
  60. }
  61. // Calculate IP destination tn/r anomaly score
  62. float ipDst_Mahoney_score = 0;
  63. // The number of IP sources (the different values)
  64. int d_r = 0;
  65. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  66. if (i->second.pkts_received > 0)
  67. d_r++;
  68. }
  69. if(d_r > 0){
  70. // The number of the total instances
  71. int n = packetCount;
  72. // The packet count when the last novel IP was added as a sender
  73. int pktCntNvlRcvr = 0;
  74. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  75. if (pktCntNvlRcvr < i->second.firstAppearAsReceiverPktCount)
  76. pktCntNvlRcvr = i->second.firstAppearAsReceiverPktCount;
  77. }
  78. // The "time" since last anomalous (novel) IP was appeared
  79. int d_t = packetCount - pktCntNvlRcvr + 1;
  80. ipDst_Mahoney_score = (float)d_t*n/d_r;
  81. std::cout << d_t << ":" << n << ":" << d_r << "\n";
  82. std::cout << packetCount << ": Anomaly score: " << ipDst_Mahoney_score << "\n";
  83. }
  84. */
  85. // Write stats to file
  86. std::ofstream file;
  87. file.open ("ip_entropy.csv",std::ios_base::app);
  88. file << packetCount << "," << IPsSrcEntropy << "," << IPsDstEntropy << "\n";
  89. file.close();
  90. }
  91. // Aidmar
  92. /**
  93. * Increments the packet counter for the given IP address and MSS value.
  94. * @param ipAddress The IP address whose MSS packet counter should be incremented.
  95. * @param mssValue The MSS value of the packet.
  96. */
  97. void statistics::incrementMSScount(std::string ipAddress, int mssValue) {
  98. mss_distribution[{ipAddress, mssValue}]++;
  99. }
  100. // Aidmar
  101. /**
  102. * Increments the packet counter for the given IP address and window size.
  103. * @param ipAddress The IP address whose window size packet counter should be incremented.
  104. * @param winSize The window size of the packet.
  105. */
  106. void statistics::incrementWinCount(std::string ipAddress, int winSize) {
  107. win_distribution[{ipAddress, winSize}]++;
  108. }
  109. /**
  110. * Increments the packet counter for the given IP address and TTL value.
  111. * @param ipAddress The IP address whose TTL packet counter should be incremented.
  112. * @param ttlValue The TTL value of the packet.
  113. */
  114. void statistics::incrementTTLcount(std::string ipAddress, int ttlValue) {
  115. ttl_distribution[{ipAddress, ttlValue}]++;
  116. }
  117. /**
  118. * Increments the protocol counter for the given IP address and protocol.
  119. * @param ipAddress The IP address whose protocol packet counter should be incremented.
  120. * @param protocol The protocol of the packet.
  121. */
  122. void statistics::incrementProtocolCount(std::string ipAddress, std::string protocol) {
  123. protocol_distribution[{ipAddress, protocol}]++;
  124. }
  125. /**
  126. * Returns the number of packets seen for the given IP address and protocol.
  127. * @param ipAddress The IP address whose packet count is wanted.
  128. * @param protocol The protocol whose packet count is wanted.
  129. * @return an integer: the number of packets
  130. */
  131. int statistics::getProtocolCount(std::string ipAddress, std::string protocol) {
  132. return protocol_distribution[{ipAddress, protocol}];
  133. }
  134. /**
  135. * Increments the packet counter for
  136. * - the given sender IP address with outgoing port and
  137. * - the given receiver IP address with incoming port.
  138. * @param ipAddressSender The IP address of the packet sender.
  139. * @param outgoingPort The port used by the sender.
  140. * @param ipAddressReceiver The IP address of the packet receiver.
  141. * @param incomingPort The port used by the receiver.
  142. */
  143. void statistics::incrementPortCount(std::string ipAddressSender, int outgoingPort, std::string ipAddressReceiver,
  144. int incomingPort) {
  145. ip_ports[{ipAddressSender, "out", outgoingPort}]++;
  146. ip_ports[{ipAddressReceiver, "in", incomingPort}]++;
  147. }
  148. /**
  149. * Creates a new statistics object.
  150. */
  151. statistics::statistics(void) {
  152. }
  153. /**
  154. * Stores the assignment IP address -> MAC address.
  155. * @param ipAddress The IP address belonging to the given MAC address.
  156. * @param macAddress The MAC address belonging to the given IP address.
  157. */
  158. void statistics::assignMacAddress(std::string ipAddress, std::string macAddress) {
  159. ip_mac_mapping[ipAddress] = macAddress;
  160. }
  161. /**
  162. * Registers statistical data for a sent packet. Increments the counter packets_sent for the sender and
  163. * packets_received for the receiver. Adds the bytes as kbytes_sent (sender) and kybtes_received (receiver).
  164. * @param ipAddressSender The IP address of the packet sender.
  165. * @param ipAddressReceiver The IP address of the packet receiver.
  166. * @param bytesSent The packet's size.
  167. */
  168. void statistics::addIpStat_packetSent(std::string ipAddressSender, std::string ipAddressReceiver, long bytesSent) {
  169. // Aidmar - Adding IP as a sender for first time
  170. if(ip_statistics[ipAddressSender].pkts_sent==0){
  171. // Caculate Mahoney anomaly score for ip.src
  172. float ipSrc_Mahoney_score = 0;
  173. // s_r: The number of IP sources (the different values)
  174. // n: The number of the total instances
  175. // s_t: The "time" since last anomalous (novel) IP was appeared
  176. int s_t = 0, n = 0, s_r = 0;
  177. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  178. if (i->second.pkts_sent > 0)
  179. s_r++;
  180. }
  181. if(s_r > 0){
  182. // The number of the total instances
  183. n = packetCount;
  184. // The packet count when the last novel IP was added as a sender
  185. int pktCntNvlSndr = 0;
  186. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  187. if (pktCntNvlSndr < i->second.firstAppearAsSenderPktCount)
  188. pktCntNvlSndr = i->second.firstAppearAsSenderPktCount;
  189. }
  190. // The "time" since last anomalous (novel) IP was appeared
  191. s_t = packetCount - pktCntNvlSndr + 1;
  192. ipSrc_Mahoney_score = (float)s_t*n/s_r;
  193. }
  194. // Write stats to file
  195. std::ofstream file;
  196. file.open ("ip_src_anomaly_score.csv",std::ios_base::app);
  197. file << ipAddressSender << ","<< s_t << "," << n << "," << s_r << "," << ipSrc_Mahoney_score << "\n";
  198. file.close();
  199. ip_statistics[ipAddressSender].firstAppearAsSenderPktCount = packetCount;
  200. ip_statistics[ipAddressSender].sourceAnomalyScore = ipSrc_Mahoney_score;
  201. }
  202. // Aidmar - Adding IP as a receiver for first time
  203. if(ip_statistics[ipAddressReceiver].pkts_received==0){
  204. // Caculate Mahoney anomaly score for ip.dst
  205. float ipDst_Mahoney_score = 0;
  206. // s_r: The number of IP sources (the different values)
  207. // n: The number of the total instances
  208. // s_t: The "time" since last anomalous (novel) IP was appeared
  209. int s_t = 0, n = 0, s_r = 0;
  210. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  211. if (i->second.pkts_received > 0)
  212. s_r++;
  213. }
  214. if(s_r > 0){
  215. // The number of the total instances
  216. n = packetCount;
  217. // The packet count when the last novel IP was added as a sender
  218. int pktCntNvlRcvr = 0;
  219. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  220. if (pktCntNvlRcvr < i->second.firstAppearAsReceiverPktCount)
  221. pktCntNvlRcvr = i->second.firstAppearAsReceiverPktCount;
  222. }
  223. // The "time" since last anomalous (novel) IP was appeared
  224. s_t = packetCount - pktCntNvlRcvr + 1;
  225. ipDst_Mahoney_score = (float)s_t*n/s_r;
  226. }
  227. // Write stats to file
  228. std::ofstream file;
  229. file.open ("ip_dst_anomaly_score.csv",std::ios_base::app);
  230. file << ipAddressReceiver << ","<< s_t << "," << n << "," << s_r << "," << ipDst_Mahoney_score << "\n";
  231. file.close();
  232. ip_statistics[ipAddressReceiver].firstAppearAsReceiverPktCount = packetCount;
  233. ip_statistics[ipAddressReceiver].destinationAnomalyScore = ipDst_Mahoney_score;
  234. }
  235. // Update stats for packet sender
  236. ip_statistics[ipAddressSender].kbytes_sent += (float(bytesSent) / 1024);
  237. ip_statistics[ipAddressSender].pkts_sent++;
  238. // Update stats for packet receiver
  239. ip_statistics[ipAddressReceiver].kbytes_received += (float(bytesSent) / 1024);
  240. ip_statistics[ipAddressReceiver].pkts_received++;
  241. }
  242. /**
  243. * Registers a value of the TCP option Maximum Segment Size (MSS).
  244. * @param ipAddress The IP address which sent the TCP packet.
  245. * @param MSSvalue The MSS value found.
  246. */
  247. void statistics::addMSS(std::string ipAddress, int MSSvalue) {
  248. ip_sumMss[ipAddress] += MSSvalue;
  249. }
  250. /**
  251. * Setter for the timestamp_firstPacket field.
  252. * @param ts The timestamp of the first packet in the PCAP file.
  253. */
  254. void statistics::setTimestampFirstPacket(Tins::Timestamp ts) {
  255. timestamp_firstPacket = ts;
  256. }
  257. /**
  258. * Setter for the timestamp_lastPacket field.
  259. * @param ts The timestamp of the last packet in the PCAP file.
  260. */
  261. void statistics::setTimestampLastPacket(Tins::Timestamp ts) {
  262. timestamp_lastPacket = ts;
  263. }
  264. /**
  265. * Calculates the capture duration.
  266. * @return a formatted string HH:MM:SS.mmmmmm with
  267. * HH: hour, MM: minute, SS: second, mmmmmm: microseconds
  268. */
  269. std::string statistics::getCaptureDurationTimestamp() const {
  270. // Calculate duration
  271. time_t t = (timestamp_lastPacket.seconds() - timestamp_firstPacket.seconds());
  272. time_t ms = (timestamp_lastPacket.microseconds() - timestamp_firstPacket.microseconds());
  273. long int hour = t / 3600;
  274. long int remainder = (t - hour * 3600);
  275. long int minute = remainder / 60;
  276. long int second = (remainder - minute * 60) % 60;
  277. long int microseconds = ms;
  278. // Build desired output format: YYYY-mm-dd hh:mm:ss
  279. char out[64];
  280. sprintf(out, "%02ld:%02ld:%02ld.%06ld ", hour, minute, second, microseconds);
  281. return std::string(out);
  282. }
  283. /**
  284. * Calculates the capture duration.
  285. * @return a formatted string SS.mmmmmm with
  286. * S: seconds (UNIX time), mmmmmm: microseconds
  287. */
  288. float statistics::getCaptureDurationSeconds() const {
  289. timeval d;
  290. d.tv_sec = timestamp_lastPacket.seconds() - timestamp_firstPacket.seconds();
  291. d.tv_usec = timestamp_lastPacket.microseconds() - timestamp_firstPacket.microseconds();
  292. char tmbuf[64], buf[64];
  293. auto nowtm = localtime(&(d.tv_sec));
  294. strftime(tmbuf, sizeof(tmbuf), "%S", nowtm);
  295. snprintf(buf, sizeof(buf), "%s.%06u", tmbuf, (uint) d.tv_usec);
  296. return std::stof(std::string(buf));
  297. }
  298. /**
  299. * Creates a timestamp based on a time_t seconds (UNIX time format) and microseconds.
  300. * @param seconds
  301. * @param microseconds
  302. * @return a formatted string Y-m-d H:M:S.m with
  303. * Y: year, m: month, d: day, H: hour, M: minute, S: second, m: microseconds
  304. */
  305. std::string statistics::getFormattedTimestamp(time_t seconds, suseconds_t microseconds) const {
  306. timeval tv;
  307. tv.tv_sec = seconds;
  308. tv.tv_usec = microseconds;
  309. char tmbuf[64], buf[64];
  310. auto nowtm = localtime(&(tv.tv_sec));
  311. strftime(tmbuf, sizeof(tmbuf), "%Y-%m-%d %H:%M:%S", nowtm);
  312. snprintf(buf, sizeof(buf), "%s.%06u", tmbuf, (uint) tv.tv_usec);
  313. return std::string(buf);
  314. }
  315. /**
  316. * Calculates the statistics for a given IP address.
  317. * @param ipAddress The IP address whose statistics should be calculated.
  318. * @return a ip_stats struct containing statistical data derived by the statistical data collected.
  319. */
  320. ip_stats statistics::getStatsForIP(std::string ipAddress) {
  321. float duration = getCaptureDurationSeconds();
  322. entry_ipStat ipStatEntry = ip_statistics[ipAddress];
  323. ip_stats s;
  324. s.bandwidthKBitsIn = (ipStatEntry.kbytes_received / duration) * 8;
  325. s.bandwidthKBitsOut = (ipStatEntry.kbytes_sent / duration) * 8;
  326. s.packetPerSecondIn = (ipStatEntry.pkts_received / duration);
  327. s.packetPerSecondOut = (ipStatEntry.pkts_sent / duration);
  328. s.AvgPacketSizeSent = (ipStatEntry.kbytes_sent / ipStatEntry.pkts_sent);
  329. s.AvgPacketSizeRecv = (ipStatEntry.kbytes_received / ipStatEntry.pkts_received);
  330. int sumMSS = ip_sumMss[ipAddress];
  331. int tcpPacketsSent = getProtocolCount(ipAddress, "TCP");
  332. s.AvgMaxSegmentSizeTCP = ((sumMSS > 0 && tcpPacketsSent > 0) ? (sumMSS / tcpPacketsSent) : 0);
  333. return s;
  334. }
  335. /**
  336. * Increments the packet counter.
  337. */
  338. void statistics::incrementPacketCount() {
  339. packetCount++;
  340. }
  341. /**
  342. * Prints the statistics of the PCAP and IP specific statistics for the given IP address.
  343. * @param ipAddress The IP address whose statistics should be printed. Can be empty "" to print only general file statistics.
  344. */
  345. void statistics::printStats(std::string ipAddress) {
  346. std::stringstream ss;
  347. ss << std::endl;
  348. ss << "Capture duration: " << getCaptureDurationSeconds() << " seconds" << std::endl;
  349. ss << "Capture duration (HH:MM:SS.mmmmmm): " << getCaptureDurationTimestamp() << std::endl;
  350. ss << "#Packets: " << packetCount << std::endl;
  351. ss << std::endl;
  352. // Print IP address specific statistics only if IP address was given
  353. if (ipAddress != "") {
  354. entry_ipStat e = ip_statistics[ipAddress];
  355. ss << "\n----- STATS FOR IP ADDRESS [" << ipAddress << "] -------" << std::endl;
  356. ss << std::endl << "KBytes sent: " << e.kbytes_sent << std::endl;
  357. ss << "KBytes received: " << e.kbytes_received << std::endl;
  358. ss << "Packets sent: " << e.pkts_sent << std::endl;
  359. ss << "Packets received: " << e.pkts_received << "\n\n";
  360. ip_stats is = getStatsForIP(ipAddress);
  361. ss << "Bandwidth IN: " << is.bandwidthKBitsIn << " kbit/s" << std::endl;
  362. ss << "Bandwidth OUT: " << is.bandwidthKBitsOut << " kbit/s" << std::endl;
  363. ss << "Packets per second IN: " << is.packetPerSecondIn << std::endl;
  364. ss << "Packets per second OUT: " << is.packetPerSecondOut << std::endl;
  365. ss << "Avg Packet Size Sent: " << is.AvgPacketSizeSent << " kbytes" << std::endl;
  366. ss << "Avg Packet Size Received: " << is.AvgPacketSizeRecv << " kbytes" << std::endl;
  367. ss << "Avg MSS: " << is.AvgMaxSegmentSizeTCP << " bytes" << std::endl;
  368. }
  369. std::cout << ss.str();
  370. }
  371. /**
  372. * Derives general PCAP file statistics from the collected statistical data and
  373. * writes all data into a SQLite database, located at database_path.
  374. * @param database_path The path of the SQLite database file ending with .sqlite3.
  375. */
  376. void statistics::writeToDatabase(std::string database_path) {
  377. // Generate general file statistics
  378. float duration = getCaptureDurationSeconds();
  379. long sumPacketsSent = 0, senderCountIP = 0;
  380. float sumBandwidthIn = 0.0, sumBandwidthOut = 0.0;
  381. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  382. sumPacketsSent += i->second.pkts_sent;
  383. // Consumed bandwith (bytes) for sending packets
  384. sumBandwidthIn += (i->second.kbytes_received / duration);
  385. sumBandwidthOut += (i->second.kbytes_sent / duration);
  386. senderCountIP++;
  387. }
  388. float avgPacketRate = (packetCount / duration);
  389. long avgPacketSize = getAvgPacketSize();
  390. long avgPacketsSentPerHost = (sumPacketsSent / senderCountIP);
  391. float avgBandwidthInKBits = (sumBandwidthIn / senderCountIP) * 8;
  392. float avgBandwidthOutInKBits = (sumBandwidthOut / senderCountIP) * 8;
  393. // Create database and write information
  394. statistics_db db(database_path);
  395. db.writeStatisticsFile(packetCount, getCaptureDurationSeconds(),
  396. getFormattedTimestamp(timestamp_firstPacket.seconds(), timestamp_firstPacket.microseconds()),
  397. getFormattedTimestamp(timestamp_lastPacket.seconds(), timestamp_lastPacket.microseconds()),
  398. avgPacketRate, avgPacketSize, avgPacketsSentPerHost, avgBandwidthInKBits,
  399. avgBandwidthOutInKBits);
  400. db.writeStatisticsIP(ip_statistics);
  401. db.writeStatisticsTTL(ttl_distribution);
  402. db.writeStatisticsIpMac(ip_mac_mapping);
  403. db.writeStatisticsMss(ip_sumMss);
  404. db.writeStatisticsPorts(ip_ports);
  405. db.writeStatisticsProtocols(protocol_distribution);
  406. // Aidmar
  407. db.writeStatisticsMss_dist(mss_distribution);
  408. db.writeStatisticsWin(win_distribution);
  409. }
  410. /**
  411. * Returns the average packet size.
  412. * @return a float indicating the average packet size in kbytes.
  413. */
  414. float statistics::getAvgPacketSize() const {
  415. // AvgPktSize = (Sum of all packet sizes / #Packets)
  416. return (sumPacketSize / packetCount) / 1024;
  417. }
  418. /**
  419. * Adds the size of a packet (to be used to calculate the avg. packet size).
  420. * @param packetSize The size of the current packet in bytes.
  421. */
  422. void statistics::addPacketSize(uint32_t packetSize) {
  423. sumPacketSize += ((float) packetSize);
  424. }