statistics.cpp 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792
  1. // Aidmar
  2. #include <iostream>
  3. #include <fstream>
  4. #include <vector>
  5. #include <math.h>
  6. #include <algorithm>
  7. #include "statistics.h"
  8. #include <sstream>
  9. #include <SQLiteCpp/SQLiteCpp.h>
  10. #include "statistics_db.h"
  11. // Aidmar
  12. // Aidmar
  13. /**
  14. * Split a string.
  15. * @param str string to be splitted
  16. * @param delimiter delimiter to use in splitting
  17. * @return vector of substrings
  18. */
  19. std::vector<std::string> split(std::string str, char delimiter) {
  20. std::vector<std::string> internal;
  21. std::stringstream ss(str); // Turn the string into a stream.
  22. std::string tok;
  23. while(getline(ss, tok, delimiter)) {
  24. internal.push_back(tok);
  25. }
  26. return internal;
  27. }
  28. // Aidmar
  29. /**
  30. * Get the class (A,B,C,D,E) of IP address.
  31. * @param ipAddress IP that we get its class
  32. */
  33. std::string getIPv4Class(std::string ipAddress){
  34. std::string ipClass="Unknown";
  35. std::vector<std::string> ipBytes = split(ipAddress, '.');
  36. std::cout<< ipAddress << "\n";
  37. if(ipBytes.size()>1){
  38. int b1 = std::stoi(ipBytes[0]);
  39. int b2 = std::stoi(ipBytes[1]);
  40. if(b1 >= 1 && b1 <= 126){
  41. if(b1 == 10)
  42. ipClass = "A-private";
  43. else
  44. ipClass = "A";
  45. }
  46. else if(b1 == 127){
  47. ipClass = "A-unused"; // cannot be used and is reserved for loopback and diagnostic functions.
  48. }
  49. else if (b1 >= 128 && b1 <= 191){
  50. if(b1 == 172 && b2 >= 16 && b2 <= 31)
  51. ipClass = "B-private";
  52. else
  53. ipClass = "B";
  54. }
  55. else if (b1 >= 192 && b1 <= 223){
  56. if(b1 == 192 && b2 == 168)
  57. ipClass = "C-private";
  58. else
  59. ipClass = "C";
  60. }
  61. else if (b1 >= 224 && b1 <= 239)
  62. ipClass = "D"; // Reserved for Multicasting
  63. else if (b1 >= 240 && b1 <= 254)
  64. ipClass = "E"; // Experimental; used for research
  65. }
  66. /*
  67. // Could be done by using libtin IPv4Address
  68. IPv4Range range = IPv4Address("192.168.1.0") / 24;
  69. range.contains("192.168.1.250"); // Yey, it belongs to this network
  70. range.contains("192.168.0.100"); // NOPE
  71. */
  72. return ipClass;
  73. }
  74. // Aidmar
  75. /**
  76. * Get closest index for element in vector.
  77. * @param v vector
  78. * @param refElem element that we search for or for closest element
  79. */
  80. int getClosestIndex(std::vector<std::chrono::microseconds> v, std::chrono::microseconds refElem)
  81. {
  82. auto i = min_element(begin(v), end(v), [=] (std::chrono::microseconds x, std::chrono::microseconds y)
  83. {
  84. return std::abs((x - refElem).count()) < std::abs((y - refElem).count());
  85. });
  86. return std::distance(begin(v), i);
  87. }
  88. // Aidmar
  89. /**
  90. * Calculate entropy of source and destination IPs for last time interval.
  91. * The results are written to ip_entropy_interval.csv file.
  92. * @param filePath The input (background) PCAP file path.
  93. * @param intervalStartTimestamp The timstamp where the interval starts.
  94. */
  95. void statistics::calculateLastIntervalIPsEntropy(std::string filePath, std::chrono::microseconds intervalStartTimestamp){
  96. std::vector <int> IPsSrcPktsCounts;
  97. std::vector <int> IPsDstPktsCounts;
  98. std::vector <float> IPsSrcProb;
  99. std::vector <float> IPsDstProb;
  100. int pktsSent = 0, pktsReceived = 0;
  101. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  102. // TO-DO: should add this condition to avoid Segmentation Fault if(i->second.pktsSentTimestamp.size()>0)
  103. int indexStartSent = getClosestIndex(i->second.pktsSentTimestamp, intervalStartTimestamp);
  104. int IPsSrcPktsCount = i->second.pktsSentTimestamp.size() - indexStartSent;
  105. IPsSrcPktsCounts.push_back(IPsSrcPktsCount);
  106. pktsSent += IPsSrcPktsCount;
  107. //std::cout<<"IP:"<<i->first<<", indexStartSent:"<<indexStartSent<<", value:"<<i->second.pktsSentTimestamp[indexStartSent].count()<<", IPsSrcPktsCount:"<<IPsSrcPktsCount<<", total_pktsSent:"<<pktsSent<<"\n";
  108. int indexStartReceived = getClosestIndex(i->second.pktsReceivedTimestamp, intervalStartTimestamp);
  109. int IPsDstPktsCount = i->second.pktsReceivedTimestamp.size() - indexStartReceived;
  110. IPsDstPktsCounts.push_back(IPsDstPktsCount);
  111. pktsReceived += IPsDstPktsCount;
  112. }
  113. for (auto i = IPsSrcPktsCounts.begin(); i != IPsSrcPktsCounts.end(); i++) {
  114. IPsSrcProb.push_back((float)*i/pktsSent);
  115. //std::cout<<"IpSrcProb:"<<(float)*i/pktsSent<<"\n";
  116. }
  117. for (auto i = IPsDstPktsCounts.begin(); i != IPsDstPktsCounts.end(); i++) {
  118. IPsDstProb.push_back((float)*i/pktsReceived);
  119. //std::cout<<"IpDstProb:"<<(float)*i/pktsReceived<<"\n";
  120. }
  121. // Calculate IP source entropy
  122. float IPsSrcEntropy = 0;
  123. for(unsigned i=0; i < IPsSrcProb.size();i++){
  124. if (IPsSrcProb[i] > 0)
  125. IPsSrcEntropy += - IPsSrcProb[i]*log2(IPsSrcProb[i]);
  126. }
  127. // Calculate IP destination entropy
  128. float IPsDstEntropy = 0;
  129. for(unsigned i=0; i < IPsDstProb.size();i++){
  130. if (IPsDstProb[i] > 0)
  131. IPsDstEntropy += - IPsDstProb[i]*log2(IPsDstProb[i]);
  132. }
  133. // Replace pcap filename with 'filename_ip_entropy'
  134. std::string new_filepath = filePath;
  135. const std::string &newExt = "_ip_entropy_interval.csv";
  136. std::string::size_type h = new_filepath.rfind('.', new_filepath.length());
  137. if (h != std::string::npos) {
  138. new_filepath.replace(h, newExt.length(), newExt);
  139. } else {
  140. new_filepath.append(newExt);
  141. }
  142. // Write stats to file
  143. std::ofstream file;
  144. file.open (new_filepath,std::ios_base::app);
  145. file << intervalStartTimestamp.count() << "," << IPsSrcEntropy << "," << IPsDstEntropy << "\n";
  146. file.close();
  147. }
  148. // Aidmar
  149. /**
  150. * Calculate sending packet rate for each IP in last time interval.
  151. * @param intervalStartTimestamp The timstamp where the interval starts.
  152. */
  153. void statistics::calculateLastIntervalPacketRate(std::chrono::duration<int, std::micro> interval, std::chrono::microseconds intervalStartTimestamp){
  154. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  155. int indexStartSent = getClosestIndex(i->second.pktsSentTimestamp, intervalStartTimestamp);
  156. std::cout<<i->first<<", PktsSent:"<<i->second.pktsSentTimestamp.size()<<",indexStart:"<<indexStartSent<<"\n";
  157. int IPsSrcPktsCount = i->second.pktsSentTimestamp.size() - indexStartSent;
  158. std::cout<<"IPsSrcPktsCount: "<<IPsSrcPktsCount<<", Interval: "<< interval.count() <<"\n";
  159. float interval_pkt_rate = (float) IPsSrcPktsCount * 1000000 / interval.count(); // used 10^6 because interval in microseconds
  160. std::cout<<"interval_pkt_rate:"<<interval_pkt_rate<<"\n";
  161. i->second.interval_pkt_rate.push_back(0);//interval_pkt_rate);
  162. if(interval_pkt_rate > i->second.max_pkt_rate || i->second.max_pkt_rate == 0)
  163. i->second.max_pkt_rate = interval_pkt_rate;
  164. if(interval_pkt_rate < i->second.min_pkt_rate || i->second.min_pkt_rate == 0)
  165. i->second.min_pkt_rate = interval_pkt_rate;
  166. }
  167. }
  168. // Aidmar - incomplete
  169. /**
  170. * Calculate entropy for time intervals. After finishing statistics collecting, this method goes through
  171. * all stored timestamps and calculate entropy of IP source and destination.
  172. * Big time overhead!! better to calculate it on fly, while we are processing packets.
  173. * @param
  174. */
  175. /*
  176. void statistics::calculateIntervalIPsEntropy(std::chrono::microseconds interval){
  177. std::vector <std::string> IPsSrc;
  178. std::vector <std::string> IPsDst;
  179. std::vector <int> pkts_sent;
  180. std::vector <int> pkts_received;
  181. std::vector <float> IPsSrcProb;
  182. std::vector <float> IPsDstProb;
  183. time_t t = (timestamp_lastPacket.seconds() - timestamp_firstPacket.seconds());
  184. time_t ms = (timestamp_lastPacket.microseconds() - timestamp_firstPacket.microseconds());
  185. intervalNum = t/interval;
  186. for(int j=0;j<intervalNum;j++){
  187. intStart = j*interval;
  188. intEnd = intStart + interval;
  189. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  190. for(int x = 0; x<i->second.pktsSentTimestamp.size();x++){ // could have a prob loop on pktsSent, and inside we have pktsReceived..
  191. if(i->second.pktsSentTimestamp[x]>intStart && i->second.pktsSentTimestamp[x]<intEnd){
  192. IPsSrc.push_back(i->first);
  193. }
  194. if(i->second.pktsReceivedTimestamp[x]>intStart && i->second.pktsReceivedTimestamp[x]<intEnd){
  195. IPsDst.push_back(i->first);
  196. }
  197. }
  198. }
  199. //IPsSrcProb.push_back((float)i->second.pkts_sent/packetCount);
  200. //IPsDstProb.push_back((float)i->second.pkts_received/packetCount);
  201. }
  202. }*/
  203. // Aidmar
  204. /**
  205. * Calculate cumulative entropy of source and destination IPs; the entropy for packets from the beginning of the pcap file.
  206. * The results are written to filePath_ip_entropy.csv file.
  207. * @param filePath The input (background) PCAP file path.
  208. */
  209. void statistics::addIPEntropy(std::string filePath){
  210. std::vector <std::string> IPs;
  211. std::vector <float> IPsSrcProb;
  212. std::vector <float> IPsDstProb;
  213. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  214. IPs.push_back(i->first);
  215. IPsSrcProb.push_back((float)i->second.pkts_sent/packetCount);
  216. IPsDstProb.push_back((float)i->second.pkts_received/packetCount);
  217. /*std::cout << i->first << ":" << i->second.pkts_sent << ":" << i->second.pkts_received << ":"
  218. << i->second.firstAppearAsSenderPktCount << ":" << i->second.firstAppearAsReceiverPktCount << ":"
  219. << packetCount << "\n";*/
  220. }
  221. // Calculate IP source entropy
  222. float IPsSrcEntropy = 0;
  223. for(unsigned i=0; i < IPsSrcProb.size();i++){
  224. if (IPsSrcProb[i] > 0)
  225. IPsSrcEntropy += - IPsSrcProb[i]*log2(IPsSrcProb[i]);
  226. }
  227. std::cout << packetCount << ": SrcEnt: " << IPsSrcEntropy << "\n";
  228. // Calculate IP destination entropy
  229. float IPsDstEntropy = 0;
  230. for(unsigned i=0; i < IPsDstProb.size();i++){
  231. if (IPsDstProb[i] > 0)
  232. IPsDstEntropy += - IPsDstProb[i]*log2(IPsDstProb[i]);
  233. }
  234. std::cout << packetCount << ": DstEnt: " << IPsDstEntropy << "\n";
  235. // Write stats to file
  236. std::ofstream file;
  237. // Replace pcap filename with 'filename_ip_entropy'
  238. std::string new_filepath = filePath;
  239. const std::string &newExt = "_ip_entropy.csv";
  240. std::string::size_type h = new_filepath.rfind('.', new_filepath.length());
  241. if (h != std::string::npos) {
  242. new_filepath.replace(h, newExt.length(), newExt);
  243. } else {
  244. new_filepath.append(newExt);
  245. }
  246. file.open (new_filepath,std::ios_base::app);
  247. file << packetCount << "," << IPsSrcEntropy << "," << IPsDstEntropy << "\n";
  248. file.close();
  249. }
  250. // Aidmar
  251. /**
  252. * Increments the packet counter for the given conversation.
  253. * @param ipAddressSender The sender IP address.
  254. * @param sport The source port.
  255. * @param ipAddressReceiver The receiver IP address.
  256. * @param dport The destination port.
  257. * @param timestamp The timestamp of the packet.
  258. */
  259. void statistics::addFlowStat(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp){
  260. flow f1 = {ipAddressReceiver, dport, ipAddressSender, sport};
  261. flow f2 = {ipAddressSender, sport, ipAddressReceiver, dport};
  262. // if already exist A(ipAddressReceiver, dport), B(ipAddressSender, sport)
  263. if (flow_statistics.count(f1)>0){
  264. flow_statistics[f1].pkts_B_A++;
  265. flow_statistics[f1].pkts_B_A_timestamp.push_back(timestamp);
  266. if(flow_statistics[f1].pkts_A_B_timestamp.size()>0){
  267. flow_statistics[f1].pkts_delay.push_back(std::chrono::duration_cast<std::chrono::microseconds> (timestamp - flow_statistics[f1].pkts_A_B_timestamp[flow_statistics[f1].pkts_A_B_timestamp.size()-1])); // TO-DO: use .back()
  268. }
  269. //std::cout<<timestamp.count()<<"::"<<ipAddressReceiver<<":"<<dport<<","<<ipAddressSender<<":"<<sport<<"\n";
  270. //std::cout<<flow_statistics[f1].pkts_A_B<<"\n";
  271. //std::cout<<flow_statistics[f1].pkts_B_A<<"\n";
  272. }
  273. else{
  274. flow_statistics[f2].pkts_A_B++;
  275. flow_statistics[f2].pkts_A_B_timestamp.push_back(timestamp);
  276. if(flow_statistics[f2].pkts_B_A_timestamp.size()>0){
  277. flow_statistics[f2].pkts_delay.push_back(std::chrono::duration_cast<std::chrono::microseconds> (timestamp - flow_statistics[f2].pkts_B_A_timestamp[flow_statistics[f2].pkts_B_A_timestamp.size()-1])); // TO-DO: use .back()
  278. }
  279. //std::cout<<timestamp.count()<<"::"<<ipAddressSender<<":"<<sport<<","<<ipAddressReceiver<<":"<<dport<<"\n";
  280. //std::cout<<flow_statistics[f2].pkts_A_B<<"\n";
  281. //std::cout<<flow_statistics[f2].pkts_B_A<<"\n";
  282. }
  283. }
  284. // Aidmar
  285. /**
  286. * Increments the packet counter for the given IP address and MSS value.
  287. * @param ipAddress The IP address whose MSS packet counter should be incremented.
  288. * @param mssValue The MSS value of the packet.
  289. */
  290. void statistics::incrementMSScount(std::string ipAddress, int mssValue) {
  291. mss_distribution[{ipAddress, mssValue}]++;
  292. }
  293. // Aidmar
  294. /**
  295. * Increments the packet counter for the given IP address and window size.
  296. * @param ipAddress The IP address whose window size packet counter should be incremented.
  297. * @param winSize The window size of the packet.
  298. */
  299. void statistics::incrementWinCount(std::string ipAddress, int winSize) {
  300. win_distribution[{ipAddress, winSize}]++;
  301. }
  302. /**
  303. * Increments the packet counter for the given IP address and TTL value.
  304. * @param ipAddress The IP address whose TTL packet counter should be incremented.
  305. * @param ttlValue The TTL value of the packet.
  306. */
  307. void statistics::incrementTTLcount(std::string ipAddress, int ttlValue) {
  308. ttl_distribution[{ipAddress, ttlValue}]++;
  309. }
  310. /**
  311. * Increments the protocol counter for the given IP address and protocol.
  312. * @param ipAddress The IP address whose protocol packet counter should be incremented.
  313. * @param protocol The protocol of the packet.
  314. */
  315. void statistics::incrementProtocolCount(std::string ipAddress, std::string protocol) {
  316. protocol_distribution[{ipAddress, protocol}]++;
  317. }
  318. /**
  319. * Returns the number of packets seen for the given IP address and protocol.
  320. * @param ipAddress The IP address whose packet count is wanted.
  321. * @param protocol The protocol whose packet count is wanted.
  322. * @return an integer: the number of packets
  323. */
  324. int statistics::getProtocolCount(std::string ipAddress, std::string protocol) {
  325. return protocol_distribution[{ipAddress, protocol}];
  326. }
  327. /**
  328. * Increments the packet counter for
  329. * - the given sender IP address with outgoing port and
  330. * - the given receiver IP address with incoming port.
  331. * @param ipAddressSender The IP address of the packet sender.
  332. * @param outgoingPort The port used by the sender.
  333. * @param ipAddressReceiver The IP address of the packet receiver.
  334. * @param incomingPort The port used by the receiver.
  335. */
  336. void statistics::incrementPortCount(std::string ipAddressSender, int outgoingPort, std::string ipAddressReceiver,
  337. int incomingPort) {
  338. ip_ports[{ipAddressSender, "out", outgoingPort}]++;
  339. ip_ports[{ipAddressReceiver, "in", incomingPort}]++;
  340. }
  341. /**
  342. * Creates a new statistics object.
  343. */
  344. statistics::statistics(void) {
  345. }
  346. /**
  347. * Stores the assignment IP address -> MAC address.
  348. * @param ipAddress The IP address belonging to the given MAC address.
  349. * @param macAddress The MAC address belonging to the given IP address.
  350. */
  351. void statistics::assignMacAddress(std::string ipAddress, std::string macAddress) {
  352. ip_mac_mapping[ipAddress] = macAddress;
  353. }
  354. /**
  355. * Registers statistical data for a sent packet. Increments the counter packets_sent for the sender and
  356. * packets_received for the receiver. Adds the bytes as kbytes_sent (sender) and kybtes_received (receiver).
  357. * @param ipAddressSender The IP address of the packet sender.
  358. * @param ipAddressReceiver The IP address of the packet receiver.
  359. * @param bytesSent The packet's size.
  360. */
  361. void statistics::addIpStat_packetSent(std::string filePath, std::string ipAddressSender, std::string ipAddressReceiver, long bytesSent, std::chrono::microseconds timestamp) {
  362. // Aidmar - Adding IP as a sender for first time
  363. if(ip_statistics[ipAddressSender].pkts_sent==0){
  364. // Add the IP class
  365. ip_statistics[ipAddressSender].ip_class = getIPv4Class(ipAddressSender);
  366. // Initialize packet rates
  367. ip_statistics[ipAddressSender].max_pkt_rate = 0;
  368. ip_statistics[ipAddressSender].min_pkt_rate = 0;
  369. // Caculate Mahoney anomaly score for ip.src
  370. float ipSrc_Mahoney_score = 0;
  371. // s_r: The number of IP sources (the different values)
  372. // n: The number of the total instances
  373. // s_t: The "time" since last anomalous (novel) IP was appeared
  374. int s_t = 0, n = 0, s_r = 0;
  375. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  376. if (i->second.pkts_sent > 0)
  377. s_r++;
  378. }
  379. if(s_r > 0){
  380. // The number of the total instances
  381. n = packetCount;
  382. // The packet count when the last novel IP was added as a sender
  383. int pktCntNvlSndr = 0;
  384. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  385. if (pktCntNvlSndr < i->second.firstAppearAsSenderPktCount)
  386. pktCntNvlSndr = i->second.firstAppearAsSenderPktCount;
  387. }
  388. // The "time" since last anomalous (novel) IP was appeared
  389. s_t = packetCount - pktCntNvlSndr + 1;
  390. ipSrc_Mahoney_score = (float)s_t*n/s_r;
  391. }
  392. // Replace pcap filename with 'filename_ip_entropy'
  393. std::string new_filepath = filePath;
  394. const std::string &newExt = "_ip_src_anomaly_score.csv";
  395. std::string::size_type h = new_filepath.rfind('.', new_filepath.length());
  396. if (h != std::string::npos) {
  397. new_filepath.replace(h, newExt.length(), newExt);
  398. } else {
  399. new_filepath.append(newExt);
  400. }
  401. // Write stats to file
  402. std::ofstream file;
  403. file.open (new_filepath,std::ios_base::app);
  404. file << ipAddressSender << ","<< s_t << "," << n << "," << s_r << "," << ipSrc_Mahoney_score << "\n";
  405. file.close();
  406. ip_statistics[ipAddressSender].firstAppearAsSenderPktCount = packetCount;
  407. ip_statistics[ipAddressSender].sourceAnomalyScore = ipSrc_Mahoney_score;
  408. }
  409. // Aidmar - Adding IP as a receiver for first time
  410. if(ip_statistics[ipAddressReceiver].pkts_received==0){
  411. // Add the IP class
  412. ip_statistics[ipAddressReceiver].ip_class = getIPv4Class(ipAddressReceiver);
  413. // Caculate Mahoney anomaly score for ip.dst
  414. float ipDst_Mahoney_score = 0;
  415. // s_r: The number of IP sources (the different values)
  416. // n: The number of the total instances
  417. // s_t: The "time" since last anomalous (novel) IP was appeared
  418. int s_t = 0, n = 0, s_r = 0;
  419. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  420. if (i->second.pkts_received > 0)
  421. s_r++;
  422. }
  423. if(s_r > 0){
  424. // The number of the total instances
  425. n = packetCount;
  426. // The packet count when the last novel IP was added as a sender
  427. int pktCntNvlRcvr = 0;
  428. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  429. if (pktCntNvlRcvr < i->second.firstAppearAsReceiverPktCount)
  430. pktCntNvlRcvr = i->second.firstAppearAsReceiverPktCount;
  431. }
  432. // The "time" since last anomalous (novel) IP was appeared
  433. s_t = packetCount - pktCntNvlRcvr + 1;
  434. ipDst_Mahoney_score = (float)s_t*n/s_r;
  435. }
  436. // Replace pcap filename with 'filename_ip_entropy'
  437. std::string new_filepath = filePath;
  438. const std::string &newExt = "_ip_dst_anomaly_score.csv";
  439. std::string::size_type h = new_filepath.rfind('.', new_filepath.length());
  440. if (h != std::string::npos) {
  441. new_filepath.replace(h, newExt.length(), newExt);
  442. } else {
  443. new_filepath.append(newExt);
  444. }
  445. // Write stats to file
  446. std::ofstream file;
  447. file.open (new_filepath,std::ios_base::app);
  448. file << ipAddressReceiver << ","<< s_t << "," << n << "," << s_r << "," << ipDst_Mahoney_score << "\n";
  449. file.close();
  450. ip_statistics[ipAddressReceiver].firstAppearAsReceiverPktCount = packetCount;
  451. ip_statistics[ipAddressReceiver].destinationAnomalyScore = ipDst_Mahoney_score;
  452. }
  453. // Update stats for packet sender
  454. ip_statistics[ipAddressSender].kbytes_sent += (float(bytesSent) / 1024);
  455. ip_statistics[ipAddressSender].pkts_sent++;
  456. // Aidmar
  457. ip_statistics[ipAddressSender].pktsSentTimestamp.push_back(timestamp);
  458. // Aidmar - calculate packet rate (assumption: max_pkt_rate=1/smallest time between two consecutive pkts)
  459. // resulting in very big rates, therefore it could be better to calculate pkt rate on time intervals
  460. /*if(ip_statistics[ipAddressSender].pktsSentTimestamp.size() > 0){
  461. std::chrono::microseconds temp_pkt_consecutive_time = timestamp - ip_statistics[ipAddressSender].pktsSentTimestamp.back();
  462. float temp_pkt_rate = (float) 1000000/temp_pkt_consecutive_time.count(); // pkt per sec = 10**6/micro sec
  463. if(temp_pkt_rate > ip_statistics[ipAddressSender].max_pkt_rate || ip_statistics[ipAddressSender].max_pkt_rate == 0)
  464. ip_statistics[ipAddressSender].max_pkt_rate = temp_pkt_rate;
  465. if(temp_pkt_rate < ip_statistics[ipAddressSender].min_pkt_rate || ip_statistics[ipAddressSender].min_pkt_rate == 0)
  466. ip_statistics[ipAddressSender].min_pkt_rate = temp_pkt_rate;
  467. }*/
  468. // Update stats for packet receiver
  469. ip_statistics[ipAddressReceiver].kbytes_received += (float(bytesSent) / 1024);
  470. ip_statistics[ipAddressReceiver].pkts_received++;
  471. // Aidmar
  472. ip_statistics[ipAddressReceiver].pktsReceivedTimestamp.push_back(timestamp);
  473. }
  474. /**
  475. * Registers a value of the TCP option Maximum Segment Size (MSS).
  476. * @param ipAddress The IP address which sent the TCP packet.
  477. * @param MSSvalue The MSS value found.
  478. */
  479. void statistics::addMSS(std::string ipAddress, int MSSvalue) {
  480. ip_sumMss[ipAddress] += MSSvalue;
  481. }
  482. /**
  483. * Setter for the timestamp_firstPacket field.
  484. * @param ts The timestamp of the first packet in the PCAP file.
  485. */
  486. void statistics::setTimestampFirstPacket(Tins::Timestamp ts) {
  487. timestamp_firstPacket = ts;
  488. }
  489. /**
  490. * Setter for the timestamp_lastPacket field.
  491. * @param ts The timestamp of the last packet in the PCAP file.
  492. */
  493. void statistics::setTimestampLastPacket(Tins::Timestamp ts) {
  494. timestamp_lastPacket = ts;
  495. }
  496. // Aidmar
  497. /**
  498. * Getter for the timestamp_firstPacket field.
  499. */
  500. Tins::Timestamp statistics::getTimestampFirstPacket() {
  501. return timestamp_firstPacket;
  502. }
  503. /**
  504. * Getter for the timestamp_lastPacket field.
  505. */
  506. Tins::Timestamp statistics::getTimestampLastPacket() {
  507. return timestamp_lastPacket;
  508. }
  509. /**
  510. * Calculates the capture duration.
  511. * @return a formatted string HH:MM:SS.mmmmmm with
  512. * HH: hour, MM: minute, SS: second, mmmmmm: microseconds
  513. */
  514. std::string statistics::getCaptureDurationTimestamp() const {
  515. // Calculate duration
  516. time_t t = (timestamp_lastPacket.seconds() - timestamp_firstPacket.seconds());
  517. time_t ms = (timestamp_lastPacket.microseconds() - timestamp_firstPacket.microseconds());
  518. long int hour = t / 3600;
  519. long int remainder = (t - hour * 3600);
  520. long int minute = remainder / 60;
  521. long int second = (remainder - minute * 60) % 60;
  522. long int microseconds = ms;
  523. // Build desired output format: YYYY-mm-dd hh:mm:ss
  524. char out[64];
  525. sprintf(out, "%02ld:%02ld:%02ld.%06ld ", hour, minute, second, microseconds);
  526. return std::string(out);
  527. }
  528. /**
  529. * Calculates the capture duration.
  530. * @return a formatted string SS.mmmmmm with
  531. * S: seconds (UNIX time), mmmmmm: microseconds
  532. */
  533. float statistics::getCaptureDurationSeconds() const {
  534. timeval d;
  535. d.tv_sec = timestamp_lastPacket.seconds() - timestamp_firstPacket.seconds();
  536. d.tv_usec = timestamp_lastPacket.microseconds() - timestamp_firstPacket.microseconds();
  537. char tmbuf[64], buf[64];
  538. auto nowtm = localtime(&(d.tv_sec));
  539. strftime(tmbuf, sizeof(tmbuf), "%S", nowtm);
  540. snprintf(buf, sizeof(buf), "%s.%06u", tmbuf, (uint) d.tv_usec);
  541. return std::stof(std::string(buf));
  542. }
  543. /**
  544. * Creates a timestamp based on a time_t seconds (UNIX time format) and microseconds.
  545. * @param seconds
  546. * @param microseconds
  547. * @return a formatted string Y-m-d H:M:S.m with
  548. * Y: year, m: month, d: day, H: hour, M: minute, S: second, m: microseconds
  549. */
  550. std::string statistics::getFormattedTimestamp(time_t seconds, suseconds_t microseconds) const {
  551. timeval tv;
  552. tv.tv_sec = seconds;
  553. tv.tv_usec = microseconds;
  554. char tmbuf[64], buf[64];
  555. auto nowtm = localtime(&(tv.tv_sec));
  556. strftime(tmbuf, sizeof(tmbuf), "%Y-%m-%d %H:%M:%S", nowtm);
  557. snprintf(buf, sizeof(buf), "%s.%06u", tmbuf, (uint) tv.tv_usec);
  558. return std::string(buf);
  559. }
  560. /**
  561. * Calculates the statistics for a given IP address.
  562. * @param ipAddress The IP address whose statistics should be calculated.
  563. * @return a ip_stats struct containing statistical data derived by the statistical data collected.
  564. */
  565. ip_stats statistics::getStatsForIP(std::string ipAddress) {
  566. float duration = getCaptureDurationSeconds();
  567. entry_ipStat ipStatEntry = ip_statistics[ipAddress];
  568. ip_stats s;
  569. s.bandwidthKBitsIn = (ipStatEntry.kbytes_received / duration) * 8;
  570. s.bandwidthKBitsOut = (ipStatEntry.kbytes_sent / duration) * 8;
  571. s.packetPerSecondIn = (ipStatEntry.pkts_received / duration);
  572. s.packetPerSecondOut = (ipStatEntry.pkts_sent / duration);
  573. s.AvgPacketSizeSent = (ipStatEntry.kbytes_sent / ipStatEntry.pkts_sent);
  574. s.AvgPacketSizeRecv = (ipStatEntry.kbytes_received / ipStatEntry.pkts_received);
  575. int sumMSS = ip_sumMss[ipAddress];
  576. int tcpPacketsSent = getProtocolCount(ipAddress, "TCP");
  577. s.AvgMaxSegmentSizeTCP = ((sumMSS > 0 && tcpPacketsSent > 0) ? (sumMSS / tcpPacketsSent) : 0);
  578. return s;
  579. }
  580. /**
  581. * Increments the packet counter.
  582. */
  583. void statistics::incrementPacketCount() {
  584. packetCount++;
  585. }
  586. /**
  587. * Prints the statistics of the PCAP and IP specific statistics for the given IP address.
  588. * @param ipAddress The IP address whose statistics should be printed. Can be empty "" to print only general file statistics.
  589. */
  590. void statistics::printStats(std::string ipAddress) {
  591. std::stringstream ss;
  592. ss << std::endl;
  593. ss << "Capture duration: " << getCaptureDurationSeconds() << " seconds" << std::endl;
  594. ss << "Capture duration (HH:MM:SS.mmmmmm): " << getCaptureDurationTimestamp() << std::endl;
  595. ss << "#Packets: " << packetCount << std::endl;
  596. ss << std::endl;
  597. // Print IP address specific statistics only if IP address was given
  598. if (ipAddress != "") {
  599. entry_ipStat e = ip_statistics[ipAddress];
  600. ss << "\n----- STATS FOR IP ADDRESS [" << ipAddress << "] -------" << std::endl;
  601. ss << std::endl << "KBytes sent: " << e.kbytes_sent << std::endl;
  602. ss << "KBytes received: " << e.kbytes_received << std::endl;
  603. ss << "Packets sent: " << e.pkts_sent << std::endl;
  604. ss << "Packets received: " << e.pkts_received << "\n\n";
  605. ip_stats is = getStatsForIP(ipAddress);
  606. ss << "Bandwidth IN: " << is.bandwidthKBitsIn << " kbit/s" << std::endl;
  607. ss << "Bandwidth OUT: " << is.bandwidthKBitsOut << " kbit/s" << std::endl;
  608. ss << "Packets per second IN: " << is.packetPerSecondIn << std::endl;
  609. ss << "Packets per second OUT: " << is.packetPerSecondOut << std::endl;
  610. ss << "Avg Packet Size Sent: " << is.AvgPacketSizeSent << " kbytes" << std::endl;
  611. ss << "Avg Packet Size Received: " << is.AvgPacketSizeRecv << " kbytes" << std::endl;
  612. ss << "Avg MSS: " << is.AvgMaxSegmentSizeTCP << " bytes" << std::endl;
  613. }
  614. std::cout << ss.str();
  615. }
  616. /**
  617. * Derives general PCAP file statistics from the collected statistical data and
  618. * writes all data into a SQLite database, located at database_path.
  619. * @param database_path The path of the SQLite database file ending with .sqlite3.
  620. */
  621. void statistics::writeToDatabase(std::string database_path) {
  622. // Generate general file statistics
  623. float duration = getCaptureDurationSeconds();
  624. long sumPacketsSent = 0, senderCountIP = 0;
  625. float sumBandwidthIn = 0.0, sumBandwidthOut = 0.0;
  626. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  627. sumPacketsSent += i->second.pkts_sent;
  628. // Consumed bandwith (bytes) for sending packets
  629. sumBandwidthIn += (i->second.kbytes_received / duration);
  630. sumBandwidthOut += (i->second.kbytes_sent / duration);
  631. senderCountIP++;
  632. }
  633. float avgPacketRate = (packetCount / duration);
  634. long avgPacketSize = getAvgPacketSize();
  635. long avgPacketsSentPerHost = (sumPacketsSent / senderCountIP);
  636. float avgBandwidthInKBits = (sumBandwidthIn / senderCountIP) * 8;
  637. float avgBandwidthOutInKBits = (sumBandwidthOut / senderCountIP) * 8;
  638. // Create database and write information
  639. statistics_db db(database_path);
  640. db.writeStatisticsFile(packetCount, getCaptureDurationSeconds(),
  641. getFormattedTimestamp(timestamp_firstPacket.seconds(), timestamp_firstPacket.microseconds()),
  642. getFormattedTimestamp(timestamp_lastPacket.seconds(), timestamp_lastPacket.microseconds()),
  643. avgPacketRate, avgPacketSize, avgPacketsSentPerHost, avgBandwidthInKBits,
  644. avgBandwidthOutInKBits);
  645. db.writeStatisticsIP(ip_statistics);
  646. db.writeStatisticsTTL(ttl_distribution);
  647. db.writeStatisticsIpMac(ip_mac_mapping);
  648. db.writeStatisticsMss(ip_sumMss);
  649. db.writeStatisticsPorts(ip_ports);
  650. db.writeStatisticsProtocols(protocol_distribution);
  651. // Aidmar
  652. db.writeStatisticsMss_dist(mss_distribution);
  653. db.writeStatisticsWin(win_distribution);
  654. db.writeStatisticsFlow(flow_statistics);
  655. }
  656. /**
  657. * Returns the average packet size.
  658. * @return a float indicating the average packet size in kbytes.
  659. */
  660. float statistics::getAvgPacketSize() const {
  661. // AvgPktSize = (Sum of all packet sizes / #Packets)
  662. return (sumPacketSize / packetCount) / 1024;
  663. }
  664. /**
  665. * Adds the size of a packet (to be used to calculate the avg. packet size).
  666. * @param packetSize The size of the current packet in bytes.
  667. */
  668. void statistics::addPacketSize(uint32_t packetSize) {
  669. sumPacketSize += ((float) packetSize);
  670. }