statistics.cpp 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809
  1. // Aidmar
  2. #include <iostream>
  3. #include <fstream>
  4. #include <vector>
  5. #include <math.h>
  6. #include <algorithm>
  7. #include "statistics.h"
  8. #include <sstream>
  9. #include <SQLiteCpp/SQLiteCpp.h>
  10. #include "statistics_db.h"
  11. // Aidmar
  12. // Aidmar
  13. /**
  14. * Split a string.
  15. * @param str string to be splitted
  16. * @param delimiter delimiter to use in splitting
  17. * @return vector of substrings
  18. */
  19. std::vector<std::string> split(std::string str, char delimiter) {
  20. std::vector<std::string> internal;
  21. std::stringstream ss(str); // Turn the string into a stream.
  22. std::string tok;
  23. while(getline(ss, tok, delimiter)) {
  24. internal.push_back(tok);
  25. }
  26. return internal;
  27. }
  28. // Aidmar
  29. /**
  30. * Get the class (A,B,C,D,E) of IP address.
  31. * @param ipAddress IP that we get its class
  32. */
  33. std::string getIPv4Class(std::string ipAddress){
  34. std::string ipClass="Unknown";
  35. std::vector<std::string> ipBytes = split(ipAddress, '.');
  36. std::cout<< ipAddress << "\n";
  37. if(ipBytes.size()>1){
  38. int b1 = std::stoi(ipBytes[0]);
  39. int b2 = std::stoi(ipBytes[1]);
  40. if(b1 >= 1 && b1 <= 126){
  41. if(b1 == 10)
  42. ipClass = "A-private";
  43. else
  44. ipClass = "A";
  45. }
  46. else if(b1 == 127){
  47. ipClass = "A-unused"; // cannot be used and is reserved for loopback and diagnostic functions.
  48. }
  49. else if (b1 >= 128 && b1 <= 191){
  50. if(b1 == 172 && b2 >= 16 && b2 <= 31)
  51. ipClass = "B-private";
  52. else
  53. ipClass = "B";
  54. }
  55. else if (b1 >= 192 && b1 <= 223){
  56. if(b1 == 192 && b2 == 168)
  57. ipClass = "C-private";
  58. else
  59. ipClass = "C";
  60. }
  61. else if (b1 >= 224 && b1 <= 239)
  62. ipClass = "D"; // Reserved for Multicasting
  63. else if (b1 >= 240 && b1 <= 254)
  64. ipClass = "E"; // Experimental; used for research
  65. }
  66. /*
  67. // Could be done by using libtin IPv4Address
  68. IPv4Range range = IPv4Address("192.168.1.0") / 24;
  69. range.contains("192.168.1.250"); // Yey, it belongs to this network
  70. range.contains("192.168.0.100"); // NOPE
  71. */
  72. return ipClass;
  73. }
  74. // Aidmar
  75. /**
  76. * Get closest index for element in vector.
  77. * @param v vector
  78. * @param refElem element that we search for or for closest element
  79. */
  80. int getClosestIndex(std::vector<std::chrono::microseconds> v, std::chrono::microseconds refElem)
  81. {
  82. auto i = min_element(begin(v), end(v), [=] (std::chrono::microseconds x, std::chrono::microseconds y)
  83. {
  84. return std::abs((x - refElem).count()) < std::abs((y - refElem).count());
  85. });
  86. return std::distance(begin(v), i);
  87. }
  88. // Aidmar
  89. /**
  90. * Calculate entropy of source and destination IPs for last time interval.
  91. * The results are written to ip_entropy_interval.csv file.
  92. * @param filePath The input (background) PCAP file path.
  93. * @param intervalStartTimestamp The timstamp where the interval starts.
  94. */
  95. void statistics::calculateLastIntervalIPsEntropy(std::string filePath, std::chrono::microseconds intervalStartTimestamp){
  96. std::vector <int> IPsSrcPktsCounts;
  97. std::vector <int> IPsDstPktsCounts;
  98. std::vector <float> IPsSrcProb;
  99. std::vector <float> IPsDstProb;
  100. int pktsSent = 0, pktsReceived = 0;
  101. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  102. // TO-DO: should add this condition to avoid Segmentation Fault if(i->second.pktsSentTimestamp.size()>0)
  103. int indexStartSent = getClosestIndex(i->second.pktsSentTimestamp, intervalStartTimestamp);
  104. int IPsSrcPktsCount = i->second.pktsSentTimestamp.size() - indexStartSent;
  105. IPsSrcPktsCounts.push_back(IPsSrcPktsCount);
  106. pktsSent += IPsSrcPktsCount;
  107. //std::cout<<"IP:"<<i->first<<", indexStartSent:"<<indexStartSent<<", value:"<<i->second.pktsSentTimestamp[indexStartSent].count()<<", IPsSrcPktsCount:"<<IPsSrcPktsCount<<", total_pktsSent:"<<pktsSent<<"\n";
  108. int indexStartReceived = getClosestIndex(i->second.pktsReceivedTimestamp, intervalStartTimestamp);
  109. int IPsDstPktsCount = i->second.pktsReceivedTimestamp.size() - indexStartReceived;
  110. IPsDstPktsCounts.push_back(IPsDstPktsCount);
  111. pktsReceived += IPsDstPktsCount;
  112. }
  113. for (auto i = IPsSrcPktsCounts.begin(); i != IPsSrcPktsCounts.end(); i++) {
  114. IPsSrcProb.push_back((float)*i/pktsSent);
  115. //std::cout<<"IpSrcProb:"<<(float)*i/pktsSent<<"\n";
  116. }
  117. for (auto i = IPsDstPktsCounts.begin(); i != IPsDstPktsCounts.end(); i++) {
  118. IPsDstProb.push_back((float)*i/pktsReceived);
  119. //std::cout<<"IpDstProb:"<<(float)*i/pktsReceived<<"\n";
  120. }
  121. // Calculate IP source entropy
  122. float IPsSrcEntropy = 0;
  123. for(unsigned i=0; i < IPsSrcProb.size();i++){
  124. if (IPsSrcProb[i] > 0)
  125. IPsSrcEntropy += - IPsSrcProb[i]*log2(IPsSrcProb[i]);
  126. }
  127. // Calculate IP destination entropy
  128. float IPsDstEntropy = 0;
  129. for(unsigned i=0; i < IPsDstProb.size();i++){
  130. if (IPsDstProb[i] > 0)
  131. IPsDstEntropy += - IPsDstProb[i]*log2(IPsDstProb[i]);
  132. }
  133. // Replace pcap filename with 'filename_ip_entropy'
  134. std::string new_filepath = filePath;
  135. const std::string &newExt = "_ip_entropy_interval.csv";
  136. std::string::size_type h = new_filepath.rfind('.', new_filepath.length());
  137. if (h != std::string::npos) {
  138. new_filepath.replace(h, newExt.length(), newExt);
  139. } else {
  140. new_filepath.append(newExt);
  141. }
  142. // Write stats to file
  143. std::ofstream file;
  144. file.open (new_filepath,std::ios_base::app);
  145. file << intervalStartTimestamp.count() << "," << IPsSrcEntropy << "," << IPsDstEntropy << "\n";
  146. file.close();
  147. }
  148. // Aidmar
  149. /**
  150. * Calculate sending packet rate for each IP in last time interval.
  151. * @param intervalStartTimestamp The timstamp where the interval starts.
  152. */
  153. void statistics::calculateLastIntervalPacketRate(std::chrono::duration<int, std::micro> interval, std::chrono::microseconds intervalStartTimestamp){
  154. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  155. int indexStartSent = getClosestIndex(i->second.pktsSentTimestamp, intervalStartTimestamp);
  156. std::cout<<i->first<<", PktsSent:"<<i->second.pktsSentTimestamp.size()<<",indexStart:"<<indexStartSent<<"\n";
  157. int IPsSrcPktsCount = i->second.pktsSentTimestamp.size() - indexStartSent;
  158. std::cout<<"IPsSrcPktsCount: "<<IPsSrcPktsCount<<", Interval: "<< interval.count() <<"\n";
  159. float interval_pkt_rate = (float) IPsSrcPktsCount * 1000000 / interval.count(); // used 10^6 because interval in microseconds
  160. std::cout<<"interval_pkt_rate:"<<interval_pkt_rate<<"\n";
  161. i->second.interval_pkt_rate.push_back(0);//interval_pkt_rate);
  162. if(interval_pkt_rate > i->second.max_pkt_rate || i->second.max_pkt_rate == 0)
  163. i->second.max_pkt_rate = interval_pkt_rate;
  164. if(interval_pkt_rate < i->second.min_pkt_rate || i->second.min_pkt_rate == 0)
  165. i->second.min_pkt_rate = interval_pkt_rate;
  166. }
  167. }
  168. void statistics::addIntervalStat(std::chrono::duration<int, std::micro> interval, std::chrono::microseconds intervalStartTimestamp, int previousPacketCount){
  169. //std::string filePath = "";
  170. //calculateLastIntervalIPsEntropy(filePath, intervalStartTimestamp);
  171. //calculateLastIntervalPacketRate(interval, intervalStartTimestamp);
  172. std::string interval_start_str = std::to_string(intervalStartTimestamp.count());
  173. interval_statistics[interval_start_str].pkts_count = packetCount - previousPacketCount;
  174. }
  175. // Aidmar - incomplete
  176. /**
  177. * Calculate entropy for time intervals. After finishing statistics collecting, this method goes through
  178. * all stored timestamps and calculate entropy of IP source and destination.
  179. * Big time overhead!! better to calculate it on fly, while we are processing packets.
  180. * @param
  181. */
  182. /*
  183. void statistics::calculateIntervalIPsEntropy(std::chrono::microseconds interval){
  184. std::vector <std::string> IPsSrc;
  185. std::vector <std::string> IPsDst;
  186. std::vector <int> pkts_sent;
  187. std::vector <int> pkts_received;
  188. std::vector <float> IPsSrcProb;
  189. std::vector <float> IPsDstProb;
  190. time_t t = (timestamp_lastPacket.seconds() - timestamp_firstPacket.seconds());
  191. time_t ms = (timestamp_lastPacket.microseconds() - timestamp_firstPacket.microseconds());
  192. intervalNum = t/interval;
  193. for(int j=0;j<intervalNum;j++){
  194. intStart = j*interval;
  195. intEnd = intStart + interval;
  196. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  197. for(int x = 0; x<i->second.pktsSentTimestamp.size();x++){ // could have a prob loop on pktsSent, and inside we have pktsReceived..
  198. if(i->second.pktsSentTimestamp[x]>intStart && i->second.pktsSentTimestamp[x]<intEnd){
  199. IPsSrc.push_back(i->first);
  200. }
  201. if(i->second.pktsReceivedTimestamp[x]>intStart && i->second.pktsReceivedTimestamp[x]<intEnd){
  202. IPsDst.push_back(i->first);
  203. }
  204. }
  205. }
  206. //IPsSrcProb.push_back((float)i->second.pkts_sent/packetCount);
  207. //IPsDstProb.push_back((float)i->second.pkts_received/packetCount);
  208. }
  209. }*/
  210. // Aidmar
  211. /**
  212. * Calculate cumulative entropy of source and destination IPs; the entropy for packets from the beginning of the pcap file.
  213. * The results are written to filePath_ip_entropy.csv file.
  214. * @param filePath The input (background) PCAP file path.
  215. */
  216. void statistics::addIPEntropy(std::string filePath){
  217. std::vector <std::string> IPs;
  218. std::vector <float> IPsSrcProb;
  219. std::vector <float> IPsDstProb;
  220. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  221. IPs.push_back(i->first);
  222. IPsSrcProb.push_back((float)i->second.pkts_sent/packetCount);
  223. IPsDstProb.push_back((float)i->second.pkts_received/packetCount);
  224. /*std::cout << i->first << ":" << i->second.pkts_sent << ":" << i->second.pkts_received << ":"
  225. << i->second.firstAppearAsSenderPktCount << ":" << i->second.firstAppearAsReceiverPktCount << ":"
  226. << packetCount << "\n";*/
  227. }
  228. // Calculate IP source entropy
  229. float IPsSrcEntropy = 0;
  230. for(unsigned i=0; i < IPsSrcProb.size();i++){
  231. if (IPsSrcProb[i] > 0)
  232. IPsSrcEntropy += - IPsSrcProb[i]*log2(IPsSrcProb[i]);
  233. }
  234. std::cout << packetCount << ": SrcEnt: " << IPsSrcEntropy << "\n";
  235. // Calculate IP destination entropy
  236. float IPsDstEntropy = 0;
  237. for(unsigned i=0; i < IPsDstProb.size();i++){
  238. if (IPsDstProb[i] > 0)
  239. IPsDstEntropy += - IPsDstProb[i]*log2(IPsDstProb[i]);
  240. }
  241. std::cout << packetCount << ": DstEnt: " << IPsDstEntropy << "\n";
  242. // Write stats to file
  243. std::ofstream file;
  244. // Replace pcap filename with 'filename_ip_entropy'
  245. std::string new_filepath = filePath;
  246. const std::string &newExt = "_ip_entropy.csv";
  247. std::string::size_type h = new_filepath.rfind('.', new_filepath.length());
  248. if (h != std::string::npos) {
  249. new_filepath.replace(h, newExt.length(), newExt);
  250. } else {
  251. new_filepath.append(newExt);
  252. }
  253. file.open (new_filepath,std::ios_base::app);
  254. file << packetCount << "," << IPsSrcEntropy << "," << IPsDstEntropy << "\n";
  255. file.close();
  256. }
  257. // Aidmar
  258. /**
  259. * Increments the packet counter for the given conversation.
  260. * @param ipAddressSender The sender IP address.
  261. * @param sport The source port.
  262. * @param ipAddressReceiver The receiver IP address.
  263. * @param dport The destination port.
  264. * @param timestamp The timestamp of the packet.
  265. */
  266. void statistics::addFlowStat(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp){
  267. flow f1 = {ipAddressReceiver, dport, ipAddressSender, sport};
  268. flow f2 = {ipAddressSender, sport, ipAddressReceiver, dport};
  269. // if already exist A(ipAddressReceiver, dport), B(ipAddressSender, sport)
  270. if (flow_statistics.count(f1)>0){
  271. flow_statistics[f1].pkts_B_A++;
  272. flow_statistics[f1].pkts_B_A_timestamp.push_back(timestamp);
  273. if(flow_statistics[f1].pkts_A_B_timestamp.size()>0){
  274. flow_statistics[f1].pkts_delay.push_back(std::chrono::duration_cast<std::chrono::microseconds> (timestamp - flow_statistics[f1].pkts_A_B_timestamp[flow_statistics[f1].pkts_A_B_timestamp.size()-1])); // TO-DO: use .back()
  275. }
  276. //std::cout<<timestamp.count()<<"::"<<ipAddressReceiver<<":"<<dport<<","<<ipAddressSender<<":"<<sport<<"\n";
  277. //std::cout<<flow_statistics[f1].pkts_A_B<<"\n";
  278. //std::cout<<flow_statistics[f1].pkts_B_A<<"\n";
  279. }
  280. else{
  281. flow_statistics[f2].pkts_A_B++;
  282. flow_statistics[f2].pkts_A_B_timestamp.push_back(timestamp);
  283. if(flow_statistics[f2].pkts_B_A_timestamp.size()>0){
  284. flow_statistics[f2].pkts_delay.push_back(std::chrono::duration_cast<std::chrono::microseconds> (timestamp - flow_statistics[f2].pkts_B_A_timestamp[flow_statistics[f2].pkts_B_A_timestamp.size()-1])); // TO-DO: use .back()
  285. }
  286. //std::cout<<timestamp.count()<<"::"<<ipAddressSender<<":"<<sport<<","<<ipAddressReceiver<<":"<<dport<<"\n";
  287. //std::cout<<flow_statistics[f2].pkts_A_B<<"\n";
  288. //std::cout<<flow_statistics[f2].pkts_B_A<<"\n";
  289. }
  290. }
  291. // Aidmar
  292. /**
  293. * Increments the packet counter for the given IP address and MSS value.
  294. * @param ipAddress The IP address whose MSS packet counter should be incremented.
  295. * @param mssValue The MSS value of the packet.
  296. */
  297. void statistics::incrementMSScount(std::string ipAddress, int mssValue) {
  298. mss_distribution[{ipAddress, mssValue}]++;
  299. }
  300. // Aidmar
  301. /**
  302. * Increments the packet counter for the given IP address and window size.
  303. * @param ipAddress The IP address whose window size packet counter should be incremented.
  304. * @param winSize The window size of the packet.
  305. */
  306. void statistics::incrementWinCount(std::string ipAddress, int winSize) {
  307. win_distribution[{ipAddress, winSize}]++;
  308. }
  309. /**
  310. * Increments the packet counter for the given IP address and TTL value.
  311. * @param ipAddress The IP address whose TTL packet counter should be incremented.
  312. * @param ttlValue The TTL value of the packet.
  313. */
  314. void statistics::incrementTTLcount(std::string ipAddress, int ttlValue) {
  315. ttl_distribution[{ipAddress, ttlValue}]++;
  316. }
  317. /**
  318. * Increments the protocol counter for the given IP address and protocol.
  319. * @param ipAddress The IP address whose protocol packet counter should be incremented.
  320. * @param protocol The protocol of the packet.
  321. */
  322. void statistics::incrementProtocolCount(std::string ipAddress, std::string protocol) {
  323. protocol_distribution[{ipAddress, protocol}]++;
  324. }
  325. /**
  326. * Returns the number of packets seen for the given IP address and protocol.
  327. * @param ipAddress The IP address whose packet count is wanted.
  328. * @param protocol The protocol whose packet count is wanted.
  329. * @return an integer: the number of packets
  330. */
  331. int statistics::getProtocolCount(std::string ipAddress, std::string protocol) {
  332. return protocol_distribution[{ipAddress, protocol}];
  333. }
  334. /**
  335. * Increments the packet counter for
  336. * - the given sender IP address with outgoing port and
  337. * - the given receiver IP address with incoming port.
  338. * @param ipAddressSender The IP address of the packet sender.
  339. * @param outgoingPort The port used by the sender.
  340. * @param ipAddressReceiver The IP address of the packet receiver.
  341. * @param incomingPort The port used by the receiver.
  342. */
  343. void statistics::incrementPortCount(std::string ipAddressSender, int outgoingPort, std::string ipAddressReceiver,
  344. int incomingPort) {
  345. ip_ports[{ipAddressSender, "out", outgoingPort}]++;
  346. ip_ports[{ipAddressReceiver, "in", incomingPort}]++;
  347. }
  348. /**
  349. * Creates a new statistics object.
  350. */
  351. statistics::statistics(void) {
  352. }
  353. /**
  354. * Stores the assignment IP address -> MAC address.
  355. * @param ipAddress The IP address belonging to the given MAC address.
  356. * @param macAddress The MAC address belonging to the given IP address.
  357. */
  358. void statistics::assignMacAddress(std::string ipAddress, std::string macAddress) {
  359. ip_mac_mapping[ipAddress] = macAddress;
  360. }
  361. /**
  362. * Registers statistical data for a sent packet. Increments the counter packets_sent for the sender and
  363. * packets_received for the receiver. Adds the bytes as kbytes_sent (sender) and kybtes_received (receiver).
  364. * @param ipAddressSender The IP address of the packet sender.
  365. * @param ipAddressReceiver The IP address of the packet receiver.
  366. * @param bytesSent The packet's size.
  367. */
  368. void statistics::addIpStat_packetSent(std::string filePath, std::string ipAddressSender, std::string ipAddressReceiver, long bytesSent, std::chrono::microseconds timestamp) {
  369. // Aidmar - Adding IP as a sender for first time
  370. if(ip_statistics[ipAddressSender].pkts_sent==0){
  371. // Add the IP class
  372. ip_statistics[ipAddressSender].ip_class = getIPv4Class(ipAddressSender);
  373. // Initialize packet rates
  374. ip_statistics[ipAddressSender].max_pkt_rate = 0;
  375. ip_statistics[ipAddressSender].min_pkt_rate = 0;
  376. // Caculate Mahoney anomaly score for ip.src
  377. float ipSrc_Mahoney_score = 0;
  378. // s_r: The number of IP sources (the different values)
  379. // n: The number of the total instances
  380. // s_t: The "time" since last anomalous (novel) IP was appeared
  381. int s_t = 0, n = 0, s_r = 0;
  382. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  383. if (i->second.pkts_sent > 0)
  384. s_r++;
  385. }
  386. if(s_r > 0){
  387. // The number of the total instances
  388. n = packetCount;
  389. // The packet count when the last novel IP was added as a sender
  390. int pktCntNvlSndr = 0;
  391. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  392. if (pktCntNvlSndr < i->second.firstAppearAsSenderPktCount)
  393. pktCntNvlSndr = i->second.firstAppearAsSenderPktCount;
  394. }
  395. // The "time" since last anomalous (novel) IP was appeared
  396. s_t = packetCount - pktCntNvlSndr + 1;
  397. ipSrc_Mahoney_score = (float)s_t*n/s_r;
  398. }
  399. // Replace pcap filename with 'filename_ip_entropy'
  400. std::string new_filepath = filePath;
  401. const std::string &newExt = "_ip_src_anomaly_score.csv";
  402. std::string::size_type h = new_filepath.rfind('.', new_filepath.length());
  403. if (h != std::string::npos) {
  404. new_filepath.replace(h, newExt.length(), newExt);
  405. } else {
  406. new_filepath.append(newExt);
  407. }
  408. // Write stats to file
  409. std::ofstream file;
  410. file.open (new_filepath,std::ios_base::app);
  411. file << ipAddressSender << ","<< s_t << "," << n << "," << s_r << "," << ipSrc_Mahoney_score << "\n";
  412. file.close();
  413. ip_statistics[ipAddressSender].firstAppearAsSenderPktCount = packetCount;
  414. ip_statistics[ipAddressSender].sourceAnomalyScore = ipSrc_Mahoney_score;
  415. }
  416. // Aidmar - Adding IP as a receiver for first time
  417. if(ip_statistics[ipAddressReceiver].pkts_received==0){
  418. // Add the IP class
  419. ip_statistics[ipAddressReceiver].ip_class = getIPv4Class(ipAddressReceiver);
  420. // Caculate Mahoney anomaly score for ip.dst
  421. float ipDst_Mahoney_score = 0;
  422. // s_r: The number of IP sources (the different values)
  423. // n: The number of the total instances
  424. // s_t: The "time" since last anomalous (novel) IP was appeared
  425. int s_t = 0, n = 0, s_r = 0;
  426. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  427. if (i->second.pkts_received > 0)
  428. s_r++;
  429. }
  430. if(s_r > 0){
  431. // The number of the total instances
  432. n = packetCount;
  433. // The packet count when the last novel IP was added as a sender
  434. int pktCntNvlRcvr = 0;
  435. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  436. if (pktCntNvlRcvr < i->second.firstAppearAsReceiverPktCount)
  437. pktCntNvlRcvr = i->second.firstAppearAsReceiverPktCount;
  438. }
  439. // The "time" since last anomalous (novel) IP was appeared
  440. s_t = packetCount - pktCntNvlRcvr + 1;
  441. ipDst_Mahoney_score = (float)s_t*n/s_r;
  442. }
  443. // Replace pcap filename with 'filename_ip_entropy'
  444. std::string new_filepath = filePath;
  445. const std::string &newExt = "_ip_dst_anomaly_score.csv";
  446. std::string::size_type h = new_filepath.rfind('.', new_filepath.length());
  447. if (h != std::string::npos) {
  448. new_filepath.replace(h, newExt.length(), newExt);
  449. } else {
  450. new_filepath.append(newExt);
  451. }
  452. // Write stats to file
  453. std::ofstream file;
  454. file.open (new_filepath,std::ios_base::app);
  455. file << ipAddressReceiver << ","<< s_t << "," << n << "," << s_r << "," << ipDst_Mahoney_score << "\n";
  456. file.close();
  457. ip_statistics[ipAddressReceiver].firstAppearAsReceiverPktCount = packetCount;
  458. ip_statistics[ipAddressReceiver].destinationAnomalyScore = ipDst_Mahoney_score;
  459. }
  460. // Update stats for packet sender
  461. ip_statistics[ipAddressSender].kbytes_sent += (float(bytesSent) / 1024);
  462. ip_statistics[ipAddressSender].pkts_sent++;
  463. // Aidmar
  464. ip_statistics[ipAddressSender].pktsSentTimestamp.push_back(timestamp);
  465. // Aidmar - calculate packet rate (assumption: max_pkt_rate=1/smallest time between two consecutive pkts)
  466. // resulting in very big rates, therefore it could be better to calculate pkt rate on time intervals
  467. /*if(ip_statistics[ipAddressSender].pktsSentTimestamp.size() > 0){
  468. std::chrono::microseconds temp_pkt_consecutive_time = timestamp - ip_statistics[ipAddressSender].pktsSentTimestamp.back();
  469. float temp_pkt_rate = (float) 1000000/temp_pkt_consecutive_time.count(); // pkt per sec = 10**6/micro sec
  470. if(temp_pkt_rate > ip_statistics[ipAddressSender].max_pkt_rate || ip_statistics[ipAddressSender].max_pkt_rate == 0)
  471. ip_statistics[ipAddressSender].max_pkt_rate = temp_pkt_rate;
  472. if(temp_pkt_rate < ip_statistics[ipAddressSender].min_pkt_rate || ip_statistics[ipAddressSender].min_pkt_rate == 0)
  473. ip_statistics[ipAddressSender].min_pkt_rate = temp_pkt_rate;
  474. }*/
  475. // Update stats for packet receiver
  476. ip_statistics[ipAddressReceiver].kbytes_received += (float(bytesSent) / 1024);
  477. ip_statistics[ipAddressReceiver].pkts_received++;
  478. // Aidmar
  479. ip_statistics[ipAddressReceiver].pktsReceivedTimestamp.push_back(timestamp);
  480. }
  481. /**
  482. * Registers a value of the TCP option Maximum Segment Size (MSS).
  483. * @param ipAddress The IP address which sent the TCP packet.
  484. * @param MSSvalue The MSS value found.
  485. */
  486. void statistics::addMSS(std::string ipAddress, int MSSvalue) {
  487. ip_sumMss[ipAddress] += MSSvalue;
  488. }
  489. /**
  490. * Setter for the timestamp_firstPacket field.
  491. * @param ts The timestamp of the first packet in the PCAP file.
  492. */
  493. void statistics::setTimestampFirstPacket(Tins::Timestamp ts) {
  494. timestamp_firstPacket = ts;
  495. }
  496. /**
  497. * Setter for the timestamp_lastPacket field.
  498. * @param ts The timestamp of the last packet in the PCAP file.
  499. */
  500. void statistics::setTimestampLastPacket(Tins::Timestamp ts) {
  501. timestamp_lastPacket = ts;
  502. }
  503. // Aidmar
  504. /**
  505. * Getter for the timestamp_firstPacket field.
  506. */
  507. Tins::Timestamp statistics::getTimestampFirstPacket() {
  508. return timestamp_firstPacket;
  509. }
  510. /**
  511. * Getter for the timestamp_lastPacket field.
  512. */
  513. Tins::Timestamp statistics::getTimestampLastPacket() {
  514. return timestamp_lastPacket;
  515. }
  516. /**
  517. * Getter for the packetCount field.
  518. */
  519. int statistics::getPacketCount() {
  520. return packetCount;
  521. }
  522. /**
  523. * Calculates the capture duration.
  524. * @return a formatted string HH:MM:SS.mmmmmm with
  525. * HH: hour, MM: minute, SS: second, mmmmmm: microseconds
  526. */
  527. std::string statistics::getCaptureDurationTimestamp() const {
  528. // Calculate duration
  529. time_t t = (timestamp_lastPacket.seconds() - timestamp_firstPacket.seconds());
  530. time_t ms = (timestamp_lastPacket.microseconds() - timestamp_firstPacket.microseconds());
  531. long int hour = t / 3600;
  532. long int remainder = (t - hour * 3600);
  533. long int minute = remainder / 60;
  534. long int second = (remainder - minute * 60) % 60;
  535. long int microseconds = ms;
  536. // Build desired output format: YYYY-mm-dd hh:mm:ss
  537. char out[64];
  538. sprintf(out, "%02ld:%02ld:%02ld.%06ld ", hour, minute, second, microseconds);
  539. return std::string(out);
  540. }
  541. /**
  542. * Calculates the capture duration.
  543. * @return a formatted string SS.mmmmmm with
  544. * S: seconds (UNIX time), mmmmmm: microseconds
  545. */
  546. float statistics::getCaptureDurationSeconds() const {
  547. timeval d;
  548. d.tv_sec = timestamp_lastPacket.seconds() - timestamp_firstPacket.seconds();
  549. d.tv_usec = timestamp_lastPacket.microseconds() - timestamp_firstPacket.microseconds();
  550. char tmbuf[64], buf[64];
  551. auto nowtm = localtime(&(d.tv_sec));
  552. strftime(tmbuf, sizeof(tmbuf), "%S", nowtm);
  553. snprintf(buf, sizeof(buf), "%s.%06u", tmbuf, (uint) d.tv_usec);
  554. return std::stof(std::string(buf));
  555. }
  556. /**
  557. * Creates a timestamp based on a time_t seconds (UNIX time format) and microseconds.
  558. * @param seconds
  559. * @param microseconds
  560. * @return a formatted string Y-m-d H:M:S.m with
  561. * Y: year, m: month, d: day, H: hour, M: minute, S: second, m: microseconds
  562. */
  563. std::string statistics::getFormattedTimestamp(time_t seconds, suseconds_t microseconds) const {
  564. timeval tv;
  565. tv.tv_sec = seconds;
  566. tv.tv_usec = microseconds;
  567. char tmbuf[64], buf[64];
  568. auto nowtm = localtime(&(tv.tv_sec));
  569. strftime(tmbuf, sizeof(tmbuf), "%Y-%m-%d %H:%M:%S", nowtm);
  570. snprintf(buf, sizeof(buf), "%s.%06u", tmbuf, (uint) tv.tv_usec);
  571. return std::string(buf);
  572. }
  573. /**
  574. * Calculates the statistics for a given IP address.
  575. * @param ipAddress The IP address whose statistics should be calculated.
  576. * @return a ip_stats struct containing statistical data derived by the statistical data collected.
  577. */
  578. ip_stats statistics::getStatsForIP(std::string ipAddress) {
  579. float duration = getCaptureDurationSeconds();
  580. entry_ipStat ipStatEntry = ip_statistics[ipAddress];
  581. ip_stats s;
  582. s.bandwidthKBitsIn = (ipStatEntry.kbytes_received / duration) * 8;
  583. s.bandwidthKBitsOut = (ipStatEntry.kbytes_sent / duration) * 8;
  584. s.packetPerSecondIn = (ipStatEntry.pkts_received / duration);
  585. s.packetPerSecondOut = (ipStatEntry.pkts_sent / duration);
  586. s.AvgPacketSizeSent = (ipStatEntry.kbytes_sent / ipStatEntry.pkts_sent);
  587. s.AvgPacketSizeRecv = (ipStatEntry.kbytes_received / ipStatEntry.pkts_received);
  588. int sumMSS = ip_sumMss[ipAddress];
  589. int tcpPacketsSent = getProtocolCount(ipAddress, "TCP");
  590. s.AvgMaxSegmentSizeTCP = ((sumMSS > 0 && tcpPacketsSent > 0) ? (sumMSS / tcpPacketsSent) : 0);
  591. return s;
  592. }
  593. /**
  594. * Increments the packet counter.
  595. */
  596. void statistics::incrementPacketCount() {
  597. packetCount++;
  598. }
  599. /**
  600. * Prints the statistics of the PCAP and IP specific statistics for the given IP address.
  601. * @param ipAddress The IP address whose statistics should be printed. Can be empty "" to print only general file statistics.
  602. */
  603. void statistics::printStats(std::string ipAddress) {
  604. std::stringstream ss;
  605. ss << std::endl;
  606. ss << "Capture duration: " << getCaptureDurationSeconds() << " seconds" << std::endl;
  607. ss << "Capture duration (HH:MM:SS.mmmmmm): " << getCaptureDurationTimestamp() << std::endl;
  608. ss << "#Packets: " << packetCount << std::endl;
  609. ss << std::endl;
  610. // Print IP address specific statistics only if IP address was given
  611. if (ipAddress != "") {
  612. entry_ipStat e = ip_statistics[ipAddress];
  613. ss << "\n----- STATS FOR IP ADDRESS [" << ipAddress << "] -------" << std::endl;
  614. ss << std::endl << "KBytes sent: " << e.kbytes_sent << std::endl;
  615. ss << "KBytes received: " << e.kbytes_received << std::endl;
  616. ss << "Packets sent: " << e.pkts_sent << std::endl;
  617. ss << "Packets received: " << e.pkts_received << "\n\n";
  618. ip_stats is = getStatsForIP(ipAddress);
  619. ss << "Bandwidth IN: " << is.bandwidthKBitsIn << " kbit/s" << std::endl;
  620. ss << "Bandwidth OUT: " << is.bandwidthKBitsOut << " kbit/s" << std::endl;
  621. ss << "Packets per second IN: " << is.packetPerSecondIn << std::endl;
  622. ss << "Packets per second OUT: " << is.packetPerSecondOut << std::endl;
  623. ss << "Avg Packet Size Sent: " << is.AvgPacketSizeSent << " kbytes" << std::endl;
  624. ss << "Avg Packet Size Received: " << is.AvgPacketSizeRecv << " kbytes" << std::endl;
  625. ss << "Avg MSS: " << is.AvgMaxSegmentSizeTCP << " bytes" << std::endl;
  626. }
  627. std::cout << ss.str();
  628. }
  629. /**
  630. * Derives general PCAP file statistics from the collected statistical data and
  631. * writes all data into a SQLite database, located at database_path.
  632. * @param database_path The path of the SQLite database file ending with .sqlite3.
  633. */
  634. void statistics::writeToDatabase(std::string database_path) {
  635. // Generate general file statistics
  636. float duration = getCaptureDurationSeconds();
  637. long sumPacketsSent = 0, senderCountIP = 0;
  638. float sumBandwidthIn = 0.0, sumBandwidthOut = 0.0;
  639. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  640. sumPacketsSent += i->second.pkts_sent;
  641. // Consumed bandwith (bytes) for sending packets
  642. sumBandwidthIn += (i->second.kbytes_received / duration);
  643. sumBandwidthOut += (i->second.kbytes_sent / duration);
  644. senderCountIP++;
  645. }
  646. float avgPacketRate = (packetCount / duration);
  647. long avgPacketSize = getAvgPacketSize();
  648. long avgPacketsSentPerHost = (sumPacketsSent / senderCountIP);
  649. float avgBandwidthInKBits = (sumBandwidthIn / senderCountIP) * 8;
  650. float avgBandwidthOutInKBits = (sumBandwidthOut / senderCountIP) * 8;
  651. // Create database and write information
  652. statistics_db db(database_path);
  653. db.writeStatisticsFile(packetCount, getCaptureDurationSeconds(),
  654. getFormattedTimestamp(timestamp_firstPacket.seconds(), timestamp_firstPacket.microseconds()),
  655. getFormattedTimestamp(timestamp_lastPacket.seconds(), timestamp_lastPacket.microseconds()),
  656. avgPacketRate, avgPacketSize, avgPacketsSentPerHost, avgBandwidthInKBits,
  657. avgBandwidthOutInKBits);
  658. db.writeStatisticsIP(ip_statistics);
  659. db.writeStatisticsTTL(ttl_distribution);
  660. db.writeStatisticsIpMac(ip_mac_mapping);
  661. db.writeStatisticsMss(ip_sumMss);
  662. db.writeStatisticsPorts(ip_ports);
  663. db.writeStatisticsProtocols(protocol_distribution);
  664. // Aidmar
  665. db.writeStatisticsMss_dist(mss_distribution);
  666. db.writeStatisticsWin(win_distribution);
  667. db.writeStatisticsFlow(flow_statistics);
  668. db.writeStatisticsInterval(interval_statistics);
  669. }
  670. /**
  671. * Returns the average packet size.
  672. * @return a float indicating the average packet size in kbytes.
  673. */
  674. float statistics::getAvgPacketSize() const {
  675. // AvgPktSize = (Sum of all packet sizes / #Packets)
  676. return (sumPacketSize / packetCount) / 1024;
  677. }
  678. /**
  679. * Adds the size of a packet (to be used to calculate the avg. packet size).
  680. * @param packetSize The size of the current packet in bytes.
  681. */
  682. void statistics::addPacketSize(uint32_t packetSize) {
  683. sumPacketSize += ((float) packetSize);
  684. }