statistics.cpp 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867
  1. #include <iostream>
  2. #include <fstream>
  3. #include <vector>
  4. #include <math.h>
  5. #include "statistics.h"
  6. #include <sstream>
  7. #include <SQLiteCpp/SQLiteCpp.h>
  8. #include "statistics_db.h"
  9. #include "statistics.h"
  10. #include "utilities.h"
  11. using namespace Tins;
  12. /**
  13. * Checks if there is a payload and increments payloads counter.
  14. * @param pdu_l4 The packet that should be checked if it has a payload or not.
  15. */
  16. void statistics::checkPayload(const PDU *pdu_l4) {
  17. if(this->getDoExtraTests()) {
  18. // pdu_l4: Tarnsport layer 4
  19. int pktSize = pdu_l4->size();
  20. int headerSize = pdu_l4->header_size(); // TCP/UDP header
  21. int payloadSize = pktSize - headerSize;
  22. if (payloadSize > 0)
  23. payloadCount++;
  24. }
  25. }
  26. /**
  27. * Checks the correctness of TCP checksum and increments counter if the checksum was incorrect.
  28. * @param ipAddressSender The source IP.
  29. * @param ipAddressReceiver The destination IP.
  30. * @param tcpPkt The packet to get checked.
  31. */
  32. void statistics::checkTCPChecksum(const std::string &ipAddressSender, const std::string &ipAddressReceiver, TCP tcpPkt) {
  33. if(this->getDoExtraTests()) {
  34. if(check_tcpChecksum(ipAddressSender, ipAddressReceiver, tcpPkt))
  35. correctTCPChecksumCount++;
  36. else incorrectTCPChecksumCount++;
  37. }
  38. }
  39. /**
  40. * Calculates entropy of the source and destination IPs in a time interval.
  41. * @param intervalStartTimestamp The timstamp where the interval starts.
  42. * @return a vector: contains source IP entropy and destination IP entropy.
  43. */
  44. std::vector<float> statistics::calculateLastIntervalIPsEntropy(std::chrono::microseconds intervalStartTimestamp){
  45. if(this->getDoExtraTests()) {
  46. // TODO: change datastructures
  47. std::vector<long> IPsSrcPktsCounts;
  48. std::vector<long> IPsDstPktsCounts;
  49. std::vector<double> IPsSrcProb;
  50. std::vector<double> IPsDstProb;
  51. int pktsSent = 0, pktsReceived = 0;
  52. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  53. long IPsSrcPktsCount = 0;
  54. if (intervalCumIPStats.count(i->first) == 0) {
  55. IPsSrcPktsCount = i->second.pkts_sent;
  56. } else {
  57. IPsSrcPktsCount = i->second.pkts_sent-intervalCumIPStats[i->first].pkts_sent;
  58. }
  59. if(IPsSrcPktsCount != 0) {
  60. IPsSrcPktsCounts.push_back(IPsSrcPktsCount);
  61. pktsSent += IPsSrcPktsCount;
  62. }
  63. long IPsDstPktsCount = 0;
  64. if (intervalCumIPStats.count(i->first) == 0) {
  65. IPsDstPktsCount = i->second.pkts_received;
  66. } else {
  67. IPsDstPktsCount = i->second.pkts_received-intervalCumIPStats[i->first].pkts_received;
  68. }
  69. if(IPsDstPktsCount != 0) {
  70. IPsDstPktsCounts.push_back(IPsDstPktsCount);
  71. pktsReceived += IPsDstPktsCount;
  72. }
  73. }
  74. for (auto i = IPsSrcPktsCounts.begin(); i != IPsSrcPktsCounts.end(); i++) {
  75. IPsSrcProb.push_back(static_cast<double>(*i) / static_cast<double>(pktsSent));
  76. }
  77. for (auto i = IPsDstPktsCounts.begin(); i != IPsDstPktsCounts.end(); i++) {
  78. IPsDstProb.push_back(static_cast<double>(*i) / static_cast<double>(pktsReceived));
  79. }
  80. // Calculate IP source entropy
  81. double IPsSrcEntropy = 0;
  82. for (unsigned i = 0; i < IPsSrcProb.size(); i++) {
  83. if (IPsSrcProb[i] > 0)
  84. IPsSrcEntropy += -IPsSrcProb[i] * log2(IPsSrcProb[i]);
  85. }
  86. // Calculate IP destination entropy
  87. double IPsDstEntropy = 0;
  88. for (unsigned i = 0; i < IPsDstProb.size(); i++) {
  89. if (IPsDstProb[i] > 0)
  90. IPsDstEntropy += -IPsDstProb[i] * log2(IPsDstProb[i]);
  91. }
  92. // FIXME: return doubles not floats
  93. std::vector<float> entropies = {static_cast<float>(IPsSrcEntropy), static_cast<float>(IPsDstEntropy)};
  94. return entropies;
  95. }
  96. else {
  97. return {-1, -1};
  98. }
  99. }
  100. /**
  101. * Calculates the cumulative entropy of the source and destination IPs, i.e., the entropy for packets from the beginning of the pcap file.
  102. * @return a vector: contains the cumulative entropies of source and destination IPs
  103. */
  104. std::vector<float> statistics::calculateIPsCumEntropy(){
  105. if(this->getDoExtraTests()) {
  106. std::vector <std::string> IPs;
  107. std::vector <float> IPsSrcProb;
  108. std::vector <float> IPsDstProb;
  109. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  110. IPs.push_back(i->first);
  111. IPsSrcProb.push_back((float)i->second.pkts_sent/packetCount);
  112. IPsDstProb.push_back((float)i->second.pkts_received/packetCount);
  113. }
  114. // Calculate IP source entropy
  115. float IPsSrcEntropy = 0;
  116. for(unsigned i=0; i < IPsSrcProb.size();i++){
  117. if (IPsSrcProb[i] > 0)
  118. IPsSrcEntropy += - IPsSrcProb[i]*log2(IPsSrcProb[i]);
  119. }
  120. // Calculate IP destination entropy
  121. float IPsDstEntropy = 0;
  122. for(unsigned i=0; i < IPsDstProb.size();i++){
  123. if (IPsDstProb[i] > 0)
  124. IPsDstEntropy += - IPsDstProb[i]*log2(IPsDstProb[i]);
  125. }
  126. std::vector<float> entropies = {IPsSrcEntropy, IPsDstEntropy};
  127. return entropies;
  128. }
  129. else {
  130. return {-1, -1};
  131. }
  132. }
  133. /**
  134. * Calculates sending packet rate for each IP in a time interval. Finds min and max packet rate and adds them to ip_statistics map.
  135. * @param intervalStartTimestamp The timstamp where the interval starts.
  136. */
  137. void statistics::calculateIPIntervalPacketRate(std::chrono::duration<int, std::micro> interval, std::chrono::microseconds intervalStartTimestamp){
  138. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  139. int IPsSrcPktsCount = 0;
  140. for (auto j = i->second.pkts_sent_timestamp.begin(); j != i->second.pkts_sent_timestamp.end(); j++) {
  141. if(*j >= intervalStartTimestamp)
  142. IPsSrcPktsCount++;
  143. }
  144. float interval_pkt_rate = (float) IPsSrcPktsCount * 1000000 / interval.count(); // used 10^6 because interval in microseconds
  145. i->second.interval_pkt_rate.push_back(interval_pkt_rate);
  146. if(interval_pkt_rate > i->second.max_interval_pkt_rate || i->second.max_interval_pkt_rate == 0)
  147. i->second.max_interval_pkt_rate = interval_pkt_rate;
  148. if(interval_pkt_rate < i->second.min_interval_pkt_rate || i->second.min_interval_pkt_rate == 0)
  149. i->second.min_interval_pkt_rate = interval_pkt_rate;
  150. }
  151. }
  152. /**
  153. * Calculates the entropies for the count of integer values.
  154. * @param current map containing the values with counts
  155. * @param an old map containing the values with counts (from last iteration)
  156. * @return a vector containing the calculated entropies: entropy of all updated values, entropy of all novel values
  157. */
  158. std::vector<double> statistics::calculateEntropies(std::unordered_map<int, int> &map, std::unordered_map<int, int> &old) {
  159. std::vector<double> counts;
  160. int count_total = 0;
  161. double entropy = 0.0;
  162. std::vector<double> novel_counts;
  163. int novel_count_total = 0;
  164. double novel_entropy = 0.0;
  165. // iterate over all values
  166. for (auto iter: map) {
  167. if (old.count(iter.first) == 0) {
  168. // count novel values
  169. double novel_count = static_cast<double>(iter.second);
  170. counts.push_back(novel_count);
  171. count_total += novel_count;
  172. novel_counts.push_back(novel_count);
  173. novel_count_total += novel_count;
  174. } else if (old.count(iter.first) != map.count(iter.first)) {
  175. // count all increased values
  176. double count = static_cast<double>(iter.second-old[iter.first]);
  177. if (count != 0.0) {
  178. counts.push_back(count);
  179. count_total += count;
  180. }
  181. }
  182. }
  183. // calculate entropy
  184. for (auto count: counts) {
  185. double prob = count / static_cast<double>(count_total);
  186. entropy += -1 * prob * log2(prob);
  187. }
  188. // calculate novelty entropy
  189. for (auto novel_count: novel_counts) {
  190. double novel_prob = novel_count / static_cast<double>(novel_count_total);
  191. novel_entropy += -1 * novel_prob * log2(novel_prob);
  192. }
  193. return {entropy, novel_entropy};
  194. }
  195. /**
  196. * Registers statistical data for a time interval.
  197. * @param intervalStartTimestamp The timstamp where the interval starts.
  198. * @param intervalEndTimestamp The timstamp where the interval ends.
  199. * @param previousPacketCount The total number of packets in last interval.
  200. */
  201. void statistics::addIntervalStat(std::chrono::duration<int, std::micro> interval, std::chrono::microseconds intervalStartTimestamp, std::chrono::microseconds intervalEndTimestamp){
  202. // Add packet rate for each IP to ip_statistics map
  203. calculateIPIntervalPacketRate(interval, intervalStartTimestamp);
  204. std::vector<float> ipEntopies = calculateLastIntervalIPsEntropy(intervalStartTimestamp);
  205. std::vector<float> ipCumEntopies = calculateIPsCumEntropy();
  206. std::string lastPktTimestamp_s = std::to_string(intervalEndTimestamp.count());
  207. std::string intervalStartTimestamp_s = std::to_string(intervalStartTimestamp.count());
  208. // The intervalStartTimestamp_s is the previous interval lastPktTimestamp_s
  209. // TODO: check with carlos if first and last packet timestamps are alright
  210. interval_statistics[lastPktTimestamp_s].start = std::to_string(intervalStartTimestamp.count());
  211. interval_statistics[lastPktTimestamp_s].end = std::to_string(intervalEndTimestamp.count());
  212. interval_statistics[lastPktTimestamp_s].pkts_count = packetCount - intervalCumPktCount;
  213. interval_statistics[lastPktTimestamp_s].pkt_rate = static_cast<float>(interval_statistics[lastPktTimestamp_s].pkts_count) / (static_cast<double>(interval.count()) / 1000000);
  214. interval_statistics[lastPktTimestamp_s].kbytes = static_cast<float>(sumPacketSize - intervalCumSumPktSize) / 1024;
  215. interval_statistics[lastPktTimestamp_s].kbyte_rate = interval_statistics[lastPktTimestamp_s].kbytes / (static_cast<double>(interval.count()) / 1000000);
  216. interval_statistics[lastPktTimestamp_s].payload_count = payloadCount - intervalPayloadCount;
  217. interval_statistics[lastPktTimestamp_s].incorrect_tcp_checksum_count = incorrectTCPChecksumCount - intervalIncorrectTCPChecksumCount;
  218. interval_statistics[lastPktTimestamp_s].correct_tcp_checksum_count = correctTCPChecksumCount - intervalCorrectTCPChecksumCount;
  219. interval_statistics[lastPktTimestamp_s].novel_ip_count = static_cast<int>(ip_statistics.size()) - intervalCumNovelIPCount;
  220. interval_statistics[lastPktTimestamp_s].novel_ttl_count = static_cast<int>(ttl_values.size()) - intervalCumNovelTTLCount;
  221. interval_statistics[lastPktTimestamp_s].novel_win_size_count = static_cast<int>(win_values.size()) - intervalCumNovelWinSizeCount;
  222. interval_statistics[lastPktTimestamp_s].novel_tos_count = static_cast<int>(tos_values.size()) - intervalCumNovelToSCount;
  223. interval_statistics[lastPktTimestamp_s].novel_mss_count = static_cast<int>(mss_values.size()) - intervalCumNovelMSSCount;
  224. interval_statistics[lastPktTimestamp_s].novel_port_count = static_cast<int>(port_values.size()) - intervalCumNovelPortCount;
  225. interval_statistics[lastPktTimestamp_s].ttl_entropies = calculateEntropies(ttl_values, intervalCumTTLValues);
  226. interval_statistics[lastPktTimestamp_s].win_size_entropies = calculateEntropies(win_values, intervalCumWinSizeValues);
  227. interval_statistics[lastPktTimestamp_s].tos_entropies = calculateEntropies(tos_values, intervalCumTosValues);
  228. interval_statistics[lastPktTimestamp_s].mss_entropies = calculateEntropies(mss_values, intervalCumMSSValues);
  229. interval_statistics[lastPktTimestamp_s].port_entropies = calculateEntropies(port_values, intervalCumPortValues);
  230. intervalPayloadCount = payloadCount;
  231. intervalIncorrectTCPChecksumCount = incorrectTCPChecksumCount;
  232. intervalCorrectTCPChecksumCount = correctTCPChecksumCount;
  233. intervalCumPktCount = packetCount;
  234. intervalCumSumPktSize = sumPacketSize;
  235. intervalCumNovelIPCount = static_cast<int>(ip_statistics.size());
  236. intervalCumNovelTTLCount = static_cast<int>(ttl_values.size());
  237. intervalCumNovelWinSizeCount = static_cast<int>(win_values.size());
  238. intervalCumNovelToSCount =static_cast<int>(tos_values.size());
  239. intervalCumNovelMSSCount = static_cast<int>(mss_values.size());
  240. intervalCumNovelPortCount = static_cast<int>(port_values.size());
  241. intervalCumIPStats = ip_statistics;
  242. intervalCumTTLValues = ttl_values;
  243. intervalCumWinSizeValues = win_values;
  244. intervalCumTosValues = tos_values;
  245. intervalCumMSSValues = mss_values;
  246. intervalCumPortValues = port_values;
  247. if(ipEntopies.size()>1){
  248. interval_statistics[lastPktTimestamp_s].ip_src_entropy = ipEntopies[0];
  249. interval_statistics[lastPktTimestamp_s].ip_dst_entropy = ipEntopies[1];
  250. }
  251. if(ipCumEntopies.size()>1){
  252. interval_statistics[lastPktTimestamp_s].ip_src_cum_entropy = ipCumEntopies[0];
  253. interval_statistics[lastPktTimestamp_s].ip_dst_cum_entropy = ipCumEntopies[1];
  254. }
  255. }
  256. /**
  257. * Registers statistical data for a sent packet in a given conversation (two IPs, two ports).
  258. * Increments the counter packets_A_B or packets_B_A.
  259. * Adds the timestamp of the packet in pkts_A_B_timestamp or pkts_B_A_timestamp.
  260. * @param ipAddressSender The sender IP address.
  261. * @param sport The source port.
  262. * @param ipAddressReceiver The receiver IP address.
  263. * @param dport The destination port.
  264. * @param timestamp The timestamp of the packet.
  265. */
  266. void statistics::addConvStat(const std::string &ipAddressSender,int sport,const std::string &ipAddressReceiver,int dport, std::chrono::microseconds timestamp){
  267. conv f1 = {ipAddressReceiver, dport, ipAddressSender, sport};
  268. conv f2 = {ipAddressSender, sport, ipAddressReceiver, dport};
  269. // if already exist A(ipAddressReceiver, dport), B(ipAddressSender, sport) conversation
  270. if (conv_statistics.count(f1)>0){
  271. conv_statistics[f1].pkts_count++;
  272. if(conv_statistics[f1].pkts_count<=3)
  273. conv_statistics[f1].interarrival_time.push_back(std::chrono::duration_cast<std::chrono::microseconds> (timestamp - conv_statistics[f1].pkts_timestamp.back()));
  274. conv_statistics[f1].pkts_timestamp.push_back(timestamp);
  275. }
  276. // Add new conversation A(ipAddressSender, sport), B(ipAddressReceiver, dport)
  277. else{
  278. conv_statistics[f2].pkts_count++;
  279. if(conv_statistics[f2].pkts_timestamp.size()>0 && conv_statistics[f2].pkts_count<=3 )
  280. conv_statistics[f2].interarrival_time.push_back(std::chrono::duration_cast<std::chrono::microseconds> (timestamp - conv_statistics[f2].pkts_timestamp.back()));
  281. conv_statistics[f2].pkts_timestamp.push_back(timestamp);
  282. }
  283. }
  284. /**
  285. * Registers statistical data for a sent packet in a given extended conversation (two IPs, two ports, protocol).
  286. * Increments the counter packets_A_B or packets_B_A.
  287. * Adds the timestamp of the packet in pkts_A_B_timestamp or pkts_B_A_timestamp.
  288. * Updates all other statistics of conv_statistics_extended
  289. * @param ipAddressSender The sender IP address.
  290. * @param sport The source port.
  291. * @param ipAddressReceiver The receiver IP address.
  292. * @param dport The destination port.
  293. * @param protocol The used protocol.
  294. * @param timestamp The timestamp of the packet.
  295. */
  296. void statistics::addConvStatExt(const std::string &ipAddressSender,int sport,const std::string &ipAddressReceiver,int dport,const std::string &protocol, std::chrono::microseconds timestamp){
  297. if(this->getDoExtraTests()) {
  298. convWithProt f1 = {ipAddressReceiver, dport, ipAddressSender, sport, protocol};
  299. convWithProt f2 = {ipAddressSender, sport, ipAddressReceiver, dport, protocol};
  300. convWithProt f;
  301. // if there already exists a communication interval for the specified conversation
  302. if (conv_statistics_extended.count(f1) > 0 || conv_statistics_extended.count(f2) > 0){
  303. // find out which direction of conversation is contained in conv_statistics_extended
  304. if (conv_statistics_extended.count(f1) > 0)
  305. f = f1;
  306. else
  307. f = f2;
  308. // increase pkts count and check on delay
  309. conv_statistics_extended[f].pkts_count++;
  310. if (conv_statistics_extended[f].pkts_timestamp.size()>0 && conv_statistics_extended[f].pkts_count<=3)
  311. conv_statistics_extended[f].interarrival_time.push_back(std::chrono::duration_cast<std::chrono::microseconds> (timestamp - conv_statistics_extended[f].pkts_timestamp.back()));
  312. conv_statistics_extended[f].pkts_timestamp.push_back(timestamp);
  313. // if the time difference has exceeded the threshold, create a new interval with this message
  314. if (timestamp - conv_statistics_extended[f].comm_intervals.back().end > (std::chrono::microseconds) ((unsigned long) COMM_INTERVAL_THRESHOLD)) { // > or >= ?
  315. commInterval new_interval = {timestamp, timestamp, 1};
  316. conv_statistics_extended[f].comm_intervals.push_back(new_interval);
  317. }
  318. // otherwise, set the time of the last interval message to the current timestamp and increase interval packet count by 1
  319. else{
  320. conv_statistics_extended[f].comm_intervals.back().end = timestamp;
  321. conv_statistics_extended[f].comm_intervals.back().pkts_count++;
  322. }
  323. }
  324. // if there does not exist a communication interval for the specified conversation
  325. else{
  326. // add initial interval entry for this conversation
  327. commInterval initial_interval = {timestamp, timestamp, 1};
  328. entry_convStatExt entry;
  329. entry.comm_intervals.push_back(initial_interval);
  330. entry.pkts_count = 1;
  331. entry.pkts_timestamp.push_back(timestamp);
  332. conv_statistics_extended[f2] = entry;
  333. }
  334. }
  335. }
  336. /**
  337. * Aggregate the collected information about all communication intervals within conv_statistics_extended of every conversation.
  338. * Do this by computing the average packet rate per interval and the average time between intervals.
  339. * Also compute average interval duration and total communication duration (i.e. last_msg.time - first_msg.time)
  340. */
  341. void statistics::createCommIntervalStats(){
  342. // iterate over all <convWithProt, entry_convStatExt> pairs
  343. for (auto &cur_elem : conv_statistics_extended) {
  344. entry_convStatExt &entry = cur_elem.second;
  345. std::vector<commInterval> &intervals = entry.comm_intervals;
  346. // if there is only one interval, the time between intervals cannot be computed and is therefore set to 0
  347. if (intervals.size() == 1){
  348. double interval_duration = (double) (intervals[0].end - intervals[0].start).count() / (double) 1e6;
  349. entry.avg_int_pkts_count = (double) intervals[0].pkts_count;
  350. entry.avg_time_between_ints = (double) 0;
  351. entry.avg_interval_time = interval_duration;
  352. }
  353. // If there is more than one interval, compute the specified averages
  354. else if (intervals.size() > 1){
  355. long summed_pkts_count = intervals[0].pkts_count;
  356. std::chrono::microseconds time_between_ints_sum = (std::chrono::microseconds) 0;
  357. std::chrono::microseconds summed_int_duration = intervals[0].end - intervals[0].start;
  358. for (std::size_t i = 1; i < intervals.size(); i++) {
  359. summed_pkts_count += intervals[i].pkts_count;
  360. summed_int_duration += intervals[i].end - intervals[i].start;
  361. time_between_ints_sum += intervals[i].start - intervals[i - 1].end;
  362. }
  363. entry.avg_int_pkts_count = summed_pkts_count / ((double) intervals.size());
  364. entry.avg_time_between_ints = (time_between_ints_sum.count() / (double) (intervals.size() - 1)) / (double) 1e6;
  365. entry.avg_interval_time = (summed_int_duration.count() / (double) intervals.size()) / (double) 1e6;
  366. }
  367. entry.total_comm_duration = (double) (entry.pkts_timestamp.back() - entry.pkts_timestamp.front()).count() / (double) 1e6;
  368. }
  369. }
  370. /**
  371. * Increments the packet counter for the given IP address and MSS value.
  372. * @param ipAddress The IP address whose MSS packet counter should be incremented.
  373. * @param mssValue The MSS value of the packet.
  374. */
  375. void statistics::incrementMSScount(const std::string &ipAddress, int mssValue) {
  376. mss_values[mssValue]++;
  377. mss_distribution[{ipAddress, mssValue}]++;
  378. }
  379. /**
  380. * Increments the packet counter for the given IP address and window size.
  381. * @param ipAddress The IP address whose window size packet counter should be incremented.
  382. * @param winSize The window size of the packet.
  383. */
  384. void statistics::incrementWinCount(const std::string &ipAddress, int winSize) {
  385. win_values[winSize]++;
  386. win_distribution[{ipAddress, winSize}]++;
  387. }
  388. /**
  389. * Increments the packet counter for the given IP address and TTL value.
  390. * @param ipAddress The IP address whose TTL packet counter should be incremented.
  391. * @param ttlValue The TTL value of the packet.
  392. */
  393. void statistics::incrementTTLcount(const std::string &ipAddress, int ttlValue) {
  394. ttl_values[ttlValue]++;
  395. ttl_distribution[{ipAddress, ttlValue}]++;
  396. }
  397. /**
  398. * Increments the packet counter for the given IP address and ToS value.
  399. * @param ipAddress The IP address whose ToS packet counter should be incremented.
  400. * @param tosValue The ToS value of the packet.
  401. */
  402. void statistics::incrementToScount(const std::string &ipAddress, int tosValue) {
  403. tos_values[tosValue]++;
  404. tos_distribution[{ipAddress, tosValue}]++;
  405. }
  406. /**
  407. * Increments the protocol counter for the given IP address and protocol.
  408. * @param ipAddress The IP address whose protocol packet counter should be incremented.
  409. * @param protocol The protocol of the packet.
  410. */
  411. void statistics::incrementProtocolCount(const std::string &ipAddress, const std::string &protocol) {
  412. protocol_distribution[{ipAddress, protocol}].count++;
  413. }
  414. /**
  415. * Returns the number of packets seen for the given IP address and protocol.
  416. * @param ipAddress The IP address whose packet count is wanted.
  417. * @param protocol The protocol whose packet count is wanted.
  418. */
  419. int statistics::getProtocolCount(const std::string &ipAddress, const std::string &protocol) {
  420. return protocol_distribution[{ipAddress, protocol}].count;
  421. }
  422. /**
  423. * Increases the byte counter for the given IP address and protocol.
  424. * @param ipAddress The IP address whose protocol byte counter should be increased.
  425. * @param protocol The protocol of the packet.
  426. * @param byteSent The packet's size.
  427. */
  428. void statistics::increaseProtocolByteCount(const std::string &ipAddress, const std::string &protocol, long bytesSent) {
  429. protocol_distribution[{ipAddress, protocol}].byteCount += bytesSent;
  430. }
  431. /**
  432. * Returns the number of bytes seen for the given IP address and protocol.
  433. * @param ipAddress The IP address whose byte count is wanted.
  434. * @param protocol The protocol whose byte count is wanted.
  435. * @return a float: The number of bytes
  436. */
  437. float statistics::getProtocolByteCount(const std::string &ipAddress, const std::string &protocol) {
  438. return protocol_distribution[{ipAddress, protocol}].byteCount;
  439. }
  440. /**
  441. * Increments the packet counter for
  442. * - the given sender IP address with outgoing port and
  443. * - the given receiver IP address with incoming port.
  444. * @param ipAddressSender The IP address of the packet sender.
  445. * @param outgoingPort The port used by the sender.
  446. * @param ipAddressReceiver The IP address of the packet receiver.
  447. * @param incomingPort The port used by the receiver.
  448. */
  449. void statistics::incrementPortCount(const std::string &ipAddressSender, int outgoingPort, const std::string &ipAddressReceiver,
  450. int incomingPort, const std::string &protocol) {
  451. port_values[outgoingPort]++;
  452. port_values[incomingPort]++;
  453. ip_ports[{ipAddressSender, "out", outgoingPort, protocol}].count++;
  454. ip_ports[{ipAddressReceiver, "in", incomingPort, protocol}].count++;
  455. }
  456. /**
  457. * Increases the packet byte counter for
  458. * - the given sender IP address with outgoing port and
  459. * - the given receiver IP address with incoming port.
  460. * @param ipAddressSender The IP address of the packet sender.
  461. * @param outgoingPort The port used by the sender.
  462. * @param ipAddressReceiver The IP address of the packet receiver.
  463. * @param incomingPort The port used by the receiver.
  464. * @param byteSent The packet's size.
  465. */
  466. void statistics::increasePortByteCount(const std::string &ipAddressSender, int outgoingPort, const std::string &ipAddressReceiver,
  467. int incomingPort, long bytesSent, const std::string &protocol) {
  468. ip_ports[{ipAddressSender, "out", outgoingPort, protocol}].byteCount += bytesSent;
  469. ip_ports[{ipAddressReceiver, "in", incomingPort, protocol}].byteCount += bytesSent;
  470. }
  471. /**
  472. * Increments the packet counter for
  473. * - the given sender MAC address and
  474. * - the given receiver MAC address.
  475. * @param srcMac The MAC address of the packet sender.
  476. * @param dstMac The MAC address of the packet receiver.
  477. * @param typeNumber The payload type number of the packet.
  478. */
  479. void statistics::incrementUnrecognizedPDUCount(const std::string &srcMac, const std::string &dstMac, uint32_t typeNumber,
  480. const std::string &timestamp) {
  481. unrecognized_PDUs[{srcMac, dstMac, typeNumber}].count++;
  482. unrecognized_PDUs[{srcMac, dstMac, typeNumber}].timestamp_last_occurrence = timestamp;
  483. }
  484. /**
  485. * Creates a new statistics object.
  486. */
  487. statistics::statistics(std::string resourcePath) {;
  488. this->resourcePath = resourcePath;
  489. }
  490. /**
  491. * Stores the assignment IP address -> MAC address.
  492. * @param ipAddress The IP address belonging to the given MAC address.
  493. * @param macAddress The MAC address belonging to the given IP address.
  494. */
  495. void statistics::assignMacAddress(const std::string &ipAddress, const std::string &macAddress) {
  496. ip_mac_mapping[ipAddress] = macAddress;
  497. }
  498. /**
  499. * Registers statistical data for a sent packet. Increments the counter packets_sent for the sender and
  500. * packets_received for the receiver. Adds the bytes as kbytes_sent (sender) and kybtes_received (receiver).
  501. * @param ipAddressSender The IP address of the packet sender.
  502. * @param ipAddressReceiver The IP address of the packet receiver.
  503. * @param bytesSent The packet's size.
  504. */
  505. void statistics::addIpStat_packetSent(const std::string &ipAddressSender, const std::string &ipAddressReceiver, long bytesSent, std::chrono::microseconds timestamp) {
  506. // Adding IP as a sender for first time
  507. if(ip_statistics[ipAddressSender].pkts_sent==0){
  508. // Add the IP class
  509. ip_statistics[ipAddressSender].ip_class = getIPv4Class(ipAddressSender);
  510. }
  511. // Adding IP as a receiver for first time
  512. if(ip_statistics[ipAddressReceiver].pkts_received==0){
  513. // Add the IP class
  514. ip_statistics[ipAddressReceiver].ip_class = getIPv4Class(ipAddressReceiver);
  515. }
  516. // Update stats for packet sender
  517. ip_statistics[ipAddressSender].kbytes_sent += (float(bytesSent) / 1024);
  518. ip_statistics[ipAddressSender].pkts_sent++;
  519. ip_statistics[ipAddressSender].pkts_sent_timestamp.push_back(timestamp);
  520. // Update stats for packet receiver
  521. ip_statistics[ipAddressReceiver].kbytes_received += (float(bytesSent) / 1024);
  522. ip_statistics[ipAddressReceiver].pkts_received++;
  523. ip_statistics[ipAddressReceiver].pkts_received_timestamp.push_back(timestamp);
  524. if(this->getDoExtraTests()) {
  525. // Increment Degrees for sender and receiver, if Sender sends its first packet to this receiver
  526. std::unordered_set<std::string>::const_iterator found_receiver = contacted_ips[ipAddressSender].find(ipAddressReceiver);
  527. if(found_receiver == contacted_ips[ipAddressSender].end()){
  528. // Receiver is NOT contained in the List of IPs, that the Sender has contacted, therefore this is the first packet in this direction
  529. ip_statistics[ipAddressSender].out_degree++;
  530. ip_statistics[ipAddressReceiver].in_degree++;
  531. // Increment overall_degree only if this is the first packet for the connection (both directions)
  532. // Therefore check, whether Receiver has contacted Sender before
  533. std::unordered_set<std::string>::const_iterator sender_contacted = contacted_ips[ipAddressReceiver].find(ipAddressSender);
  534. if(sender_contacted == contacted_ips[ipAddressReceiver].end()){
  535. ip_statistics[ipAddressSender].overall_degree++;
  536. ip_statistics[ipAddressReceiver].overall_degree++;
  537. }
  538. contacted_ips[ipAddressSender].insert(ipAddressReceiver);
  539. }
  540. }
  541. }
  542. /**
  543. * Setter for the timestamp_firstPacket field.
  544. * @param ts The timestamp of the first packet in the PCAP file.
  545. */
  546. void statistics::setTimestampFirstPacket(Tins::Timestamp ts) {
  547. timestamp_firstPacket = ts;
  548. }
  549. /**
  550. * Setter for the timestamp_lastPacket field.
  551. * @param ts The timestamp of the last packet in the PCAP file.
  552. */
  553. void statistics::setTimestampLastPacket(Tins::Timestamp ts) {
  554. timestamp_lastPacket = ts;
  555. }
  556. /**
  557. * Getter for the timestamp_firstPacket field.
  558. */
  559. Tins::Timestamp statistics::getTimestampFirstPacket() {
  560. return timestamp_firstPacket;
  561. }
  562. /**
  563. * Getter for the timestamp_lastPacket field.
  564. */
  565. Tins::Timestamp statistics::getTimestampLastPacket() {
  566. return timestamp_lastPacket;
  567. }
  568. /**
  569. * Getter for the packetCount field.
  570. */
  571. int statistics::getPacketCount() {
  572. return packetCount;
  573. }
  574. /**
  575. * Getter for the sumPacketSize field.
  576. */
  577. int statistics::getSumPacketSize() {
  578. return sumPacketSize;
  579. }
  580. /**
  581. * Returns the average packet size.
  582. * @return a float indicating the average packet size in kbytes.
  583. */
  584. float statistics::getAvgPacketSize() const {
  585. // AvgPktSize = (Sum of all packet sizes / #Packets)
  586. return (sumPacketSize / packetCount) / 1024;
  587. }
  588. /**
  589. * Adds the size of a packet (to be used to calculate the avg. packet size).
  590. * @param packetSize The size of the current packet in bytes.
  591. */
  592. void statistics::addPacketSize(uint32_t packetSize) {
  593. sumPacketSize += ((float) packetSize);
  594. }
  595. /**
  596. * Setter for the doExtraTests field.
  597. */
  598. void statistics::setDoExtraTests(bool var) {
  599. doExtraTests = var;
  600. }
  601. /**
  602. * Getter for the doExtraTests field.
  603. */
  604. bool statistics::getDoExtraTests() {
  605. return doExtraTests;
  606. }
  607. /**
  608. * Calculates the capture duration.
  609. * @return a formatted string HH:MM:SS.mmmmmm with
  610. * HH: hour, MM: minute, SS: second, mmmmmm: microseconds
  611. */
  612. std::string statistics::getCaptureDurationTimestamp() const {
  613. // Calculate duration
  614. timeval fp, lp, d;
  615. fp.tv_sec = timestamp_firstPacket.seconds();
  616. fp.tv_usec = timestamp_firstPacket.microseconds();
  617. lp.tv_sec = timestamp_lastPacket.seconds();
  618. lp.tv_usec = timestamp_lastPacket.microseconds();
  619. timersub(&lp, &fp, &d);
  620. long int hour = d.tv_sec / 3600;
  621. long int remainder = (d.tv_sec - hour * 3600);
  622. long int minute = remainder / 60;
  623. long int second = (remainder - minute * 60) % 60;
  624. long int microseconds = d.tv_usec;
  625. // Build desired output format: YYYY-mm-dd hh:mm:ss
  626. char out[64];
  627. sprintf(out, "%02ld:%02ld:%02ld.%06ld ", hour, minute, second, microseconds);
  628. return std::string(out);
  629. }
  630. /**
  631. * Calculates the capture duration.
  632. * @return a formatted string SS.mmmmmm with
  633. * S: seconds (UNIX time), mmmmmm: microseconds
  634. */
  635. float statistics::getCaptureDurationSeconds() const {
  636. timeval fp, lp, d;
  637. fp.tv_sec = timestamp_firstPacket.seconds();
  638. fp.tv_usec = timestamp_firstPacket.microseconds();
  639. lp.tv_sec = timestamp_lastPacket.seconds();
  640. lp.tv_usec = timestamp_lastPacket.microseconds();
  641. timersub(&lp, &fp, &d);
  642. char buf[64];
  643. snprintf(buf, sizeof(buf), "%u.%06u", static_cast<uint>(d.tv_sec), static_cast<uint>(d.tv_usec));
  644. return std::stof(std::string(buf));
  645. }
  646. /**
  647. * Creates a timestamp based on a time_t seconds (UNIX time format) and microseconds.
  648. * @param seconds
  649. * @param microseconds
  650. * @return a formatted string Y-m-d H:M:S.m with
  651. * Y: year, m: month, d: day, H: hour, M: minute, S: second, m: microseconds
  652. */
  653. std::string statistics::getFormattedTimestamp(time_t seconds, suseconds_t microseconds) const {
  654. timeval tv;
  655. tv.tv_sec = seconds;
  656. tv.tv_usec = microseconds;
  657. char tmbuf[20], buf[64];
  658. auto nowtm = gmtime(&(tv.tv_sec));
  659. strftime(tmbuf, sizeof(tmbuf), "%Y-%m-%d %H:%M:%S", nowtm);
  660. snprintf(buf, sizeof(buf), "%s.%06u", tmbuf, static_cast<uint>(tv.tv_usec));
  661. return std::string(buf);
  662. }
  663. /**
  664. * Calculates the statistics for a given IP address.
  665. * @param ipAddress The IP address whose statistics should be calculated.
  666. * @return a ip_stats struct containing statistical data derived by the statistical data collected.
  667. */
  668. ip_stats statistics::getStatsForIP(const std::string &ipAddress) {
  669. float duration = getCaptureDurationSeconds();
  670. entry_ipStat ipStatEntry = ip_statistics[ipAddress];
  671. ip_stats s;
  672. s.bandwidthKBitsIn = (ipStatEntry.kbytes_received / duration) * 8;
  673. s.bandwidthKBitsOut = (ipStatEntry.kbytes_sent / duration) * 8;
  674. s.packetPerSecondIn = (ipStatEntry.pkts_received / duration);
  675. s.packetPerSecondOut = (ipStatEntry.pkts_sent / duration);
  676. s.AvgPacketSizeSent = (ipStatEntry.kbytes_sent / ipStatEntry.pkts_sent);
  677. s.AvgPacketSizeRecv = (ipStatEntry.kbytes_received / ipStatEntry.pkts_received);
  678. return s;
  679. }
  680. int statistics::getDefaultInterval() {
  681. return this->default_interval;
  682. }
  683. void statistics::setDefaultInterval(int interval) {
  684. this->default_interval = interval;
  685. }
  686. /**
  687. * Increments the packet counter.
  688. */
  689. void statistics::incrementPacketCount() {
  690. packetCount++;
  691. }
  692. /**
  693. * Prints the statistics of the PCAP and IP specific statistics for the given IP address.
  694. * @param ipAddress The IP address whose statistics should be printed. Can be empty "" to print only general file statistics.
  695. */
  696. void statistics::printStats(const std::string &ipAddress) {
  697. std::stringstream ss;
  698. ss << std::endl;
  699. ss << "Capture duration: " << getCaptureDurationSeconds() << " seconds" << std::endl;
  700. ss << "Capture duration (HH:MM:SS.mmmmmm): " << getCaptureDurationTimestamp() << std::endl;
  701. ss << "#Packets: " << packetCount << std::endl;
  702. ss << std::endl;
  703. // Print IP address specific statistics only if IP address was given
  704. if (ipAddress != "") {
  705. entry_ipStat e = ip_statistics[ipAddress];
  706. ss << "\n----- STATS FOR IP ADDRESS [" << ipAddress << "] -------" << std::endl;
  707. ss << std::endl << "KBytes sent: " << e.kbytes_sent << std::endl;
  708. ss << "KBytes received: " << e.kbytes_received << std::endl;
  709. ss << "Packets sent: " << e.pkts_sent << std::endl;
  710. ss << "Packets received: " << e.pkts_received << "\n\n";
  711. ip_stats is = getStatsForIP(ipAddress);
  712. ss << "Bandwidth IN: " << is.bandwidthKBitsIn << " kbit/s" << std::endl;
  713. ss << "Bandwidth OUT: " << is.bandwidthKBitsOut << " kbit/s" << std::endl;
  714. ss << "Packets per second IN: " << is.packetPerSecondIn << std::endl;
  715. ss << "Packets per second OUT: " << is.packetPerSecondOut << std::endl;
  716. ss << "Avg Packet Size Sent: " << is.AvgPacketSizeSent << " kbytes" << std::endl;
  717. ss << "Avg Packet Size Received: " << is.AvgPacketSizeRecv << " kbytes" << std::endl;
  718. }
  719. std::cout << ss.str();
  720. }
  721. /**
  722. * Derives general PCAP file statistics from the collected statistical data and
  723. * writes all data into a SQLite database, located at database_path.
  724. * @param database_path The path of the SQLite database file ending with .sqlite3.
  725. */
  726. void statistics::writeToDatabase(std::string database_path, std::vector<std::chrono::duration<int, std::micro>> timeIntervals, bool del) {
  727. // Generate general file statistics
  728. float duration = getCaptureDurationSeconds();
  729. long sumPacketsSent = 0, senderCountIP = 0;
  730. float sumBandwidthIn = 0.0, sumBandwidthOut = 0.0;
  731. for (auto i = ip_statistics.begin(); i != ip_statistics.end(); i++) {
  732. sumPacketsSent += i->second.pkts_sent;
  733. // Consumed bandwith (bytes) for sending packets
  734. sumBandwidthIn += (i->second.kbytes_received / duration);
  735. sumBandwidthOut += (i->second.kbytes_sent / duration);
  736. senderCountIP++;
  737. }
  738. float avgPacketRate = (packetCount / duration);
  739. long avgPacketSize = getAvgPacketSize();
  740. if(senderCountIP>0) {
  741. long avgPacketsSentPerHost = (sumPacketsSent / senderCountIP);
  742. float avgBandwidthInKBits = (sumBandwidthIn / senderCountIP) * 8;
  743. float avgBandwidthOutInKBits = (sumBandwidthOut / senderCountIP) * 8;
  744. // Create database and write information
  745. statistics_db db(database_path, resourcePath);
  746. db.writeStatisticsFile(packetCount, getCaptureDurationSeconds(),
  747. getFormattedTimestamp(timestamp_firstPacket.seconds(), timestamp_firstPacket.microseconds()),
  748. getFormattedTimestamp(timestamp_lastPacket.seconds(), timestamp_lastPacket.microseconds()),
  749. avgPacketRate, avgPacketSize, avgPacketsSentPerHost, avgBandwidthInKBits,
  750. avgBandwidthOutInKBits, doExtraTests);
  751. db.writeStatisticsIP(ip_statistics);
  752. db.writeStatisticsTTL(ttl_distribution);
  753. db.writeStatisticsIpMac(ip_mac_mapping);
  754. db.writeStatisticsDegree(ip_statistics);
  755. db.writeStatisticsPorts(ip_ports);
  756. db.writeStatisticsProtocols(protocol_distribution);
  757. db.writeStatisticsMSS(mss_distribution);
  758. db.writeStatisticsToS(tos_distribution);
  759. db.writeStatisticsWin(win_distribution);
  760. db.writeStatisticsConv(conv_statistics);
  761. db.writeStatisticsConvExt(conv_statistics_extended);
  762. db.writeStatisticsInterval(interval_statistics, timeIntervals, del, this->default_interval, this->getDoExtraTests());
  763. db.writeDbVersion();
  764. db.writeStatisticsUnrecognizedPDUs(unrecognized_PDUs);
  765. }
  766. else {
  767. // Tinslib failed to recognize the types of the packets in the input PCAP
  768. std::cerr<<"ERROR: Statistics could not be collected from the input PCAP!"<<"\n";
  769. return;
  770. }
  771. }
  772. void statistics::writeIntervalsToDatabase(std::string database_path, std::vector<std::chrono::duration<int, std::micro>> timeIntervals, bool del) {
  773. statistics_db db(database_path, resourcePath);
  774. db.writeStatisticsInterval(interval_statistics, timeIntervals, del, this->default_interval, this->getDoExtraTests());
  775. }