statistics.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. /*
  2. * Class providing containers and access methods for statistical data collection.
  3. */
  4. #ifndef CPP_PCAPREADER_STATISTICS_H
  5. #define CPP_PCAPREADER_STATISTICS_H
  6. // Aidmar
  7. #include <vector>
  8. #include <unordered_map>
  9. #include <list>
  10. #include <tuple>
  11. #include <tins/timestamp.h>
  12. #include <tins/ip_address.h>
  13. #include "utilities.h"
  14. /*
  15. * Definition of structs used in unordered_map fields
  16. */
  17. /*
  18. * Struct used as data structure for method get_stats_for_ip, represents:
  19. * - Incoming bandwidth in KBits
  20. * - Outgoing bandwidth in KBits
  21. * - Number of incoming packets per second
  22. * - Number of outgoing packets per second
  23. * - Average size of sent packets in kbytes
  24. * - Average size of received packets in kybtes
  25. * - Average value of TCP option Maximum Segment Size (MSS)
  26. */
  27. struct ip_stats {
  28. float bandwidthKBitsIn;
  29. float bandwidthKBitsOut;
  30. float packetPerSecondIn;
  31. float packetPerSecondOut;
  32. float AvgPacketSizeSent;
  33. float AvgPacketSizeRecv;
  34. long AvgMaxSegmentSizeTCP;
  35. };
  36. // Aidmar
  37. /*
  38. * Struct used to represent a conv by:
  39. * - IP address A
  40. * - Port A
  41. * - IP address B
  42. * - Port B
  43. */
  44. struct conv{
  45. std::string ipAddressA;
  46. int portA;
  47. std::string ipAddressB;
  48. int portB;
  49. bool operator==(const conv &other) const {
  50. return ipAddressA == other.ipAddressA
  51. && portA == other.portA
  52. &&ipAddressB == other.ipAddressB
  53. && portB == other.portB;
  54. }
  55. };
  56. // Aidmar
  57. /*
  58. * Struct used to represent:
  59. * - IP address (IPv4 or IPv6)
  60. * - MSS value
  61. */
  62. struct ipAddress_mss {
  63. std::string ipAddress;
  64. int mssValue;
  65. bool operator==(const ipAddress_mss &other) const {
  66. return ipAddress == other.ipAddress
  67. && mssValue == other.mssValue;
  68. }
  69. };
  70. // Aidmar
  71. /*
  72. * Struct used to represent:
  73. * - IP address (IPv4 or IPv6)
  74. * - Window size
  75. */
  76. struct ipAddress_win {
  77. std::string ipAddress;
  78. int winSize;
  79. bool operator==(const ipAddress_win &other) const {
  80. return ipAddress == other.ipAddress
  81. && winSize == other.winSize;
  82. }
  83. };
  84. /*
  85. * Struct used to represent:
  86. * - IP address (IPv4 or IPv6)
  87. * - TTL value
  88. */
  89. struct ipAddress_ttl {
  90. std::string ipAddress;
  91. int ttlValue;
  92. bool operator==(const ipAddress_ttl &other) const {
  93. return ipAddress == other.ipAddress
  94. && ttlValue == other.ttlValue;
  95. }
  96. };
  97. /*
  98. * Struct used to represent:
  99. * - IP address (IPv4 or IPv6)
  100. * - Protocol (e.g. TCP, UDP, IPv4, IPv6)
  101. */
  102. struct ipAddress_protocol {
  103. std::string ipAddress;
  104. std::string protocol;
  105. bool operator==(const ipAddress_protocol &other) const {
  106. return ipAddress == other.ipAddress
  107. && protocol == other.protocol;
  108. }
  109. };
  110. /*
  111. * Struct used to represent:
  112. * - Number of received packets
  113. * - Number of sent packets
  114. * - Data received in kbytes
  115. * - Data sent in kbytes
  116. */
  117. struct entry_ipStat {
  118. long pkts_received;
  119. long pkts_sent;
  120. float kbytes_received;
  121. float kbytes_sent;
  122. // Aidmar
  123. std::string ip_class;
  124. std::vector<float> interval_pkt_rate;
  125. float max_pkt_rate;
  126. float min_pkt_rate;
  127. // Aidmar - to calculate Mahoney anomaly score
  128. long firstAppearAsSenderPktCount;
  129. long firstAppearAsReceiverPktCount;
  130. float sourceAnomalyScore;
  131. float destinationAnomalyScore;
  132. // Aidmar- To collect statstics over time interval
  133. std::vector<std::chrono::microseconds> pktsSentTimestamp;
  134. std::vector<std::chrono::microseconds> pktsReceivedTimestamp;
  135. bool operator==(const entry_ipStat &other) const {
  136. return pkts_received == other.pkts_received
  137. && pkts_sent == other.pkts_sent
  138. && kbytes_sent == other.kbytes_sent
  139. && kbytes_received == other.kbytes_received
  140. // Aidmar
  141. && interval_pkt_rate == other.interval_pkt_rate
  142. && max_pkt_rate == other.max_pkt_rate
  143. && min_pkt_rate == other.min_pkt_rate
  144. && ip_class == other.ip_class
  145. && firstAppearAsSenderPktCount == other.firstAppearAsSenderPktCount
  146. && firstAppearAsReceiverPktCount == other.firstAppearAsReceiverPktCount
  147. && sourceAnomalyScore == other.sourceAnomalyScore
  148. && destinationAnomalyScore == other.destinationAnomalyScore
  149. && pktsSentTimestamp == other.pktsSentTimestamp
  150. && pktsReceivedTimestamp == other.pktsReceivedTimestamp;
  151. }
  152. };
  153. // Aidmar
  154. /*
  155. * Struct used to represent interval statistics:
  156. * - Number of packets
  157. * - IP source entropy
  158. * - IP destination entropy
  159. * - IP source cumulative entropy
  160. * - IP destination cumulative entropy
  161. */
  162. struct entry_intervalStat {
  163. int pkts_count;
  164. float kbytes;
  165. float ip_src_entropy;
  166. float ip_dst_entropy;
  167. float ip_src_cum_entropy;
  168. float ip_dst_cum_entropy;
  169. // Predictability score
  170. //float ip_src_pred_score;
  171. //float ip_dst_pred_score;
  172. bool operator==(const entry_intervalStat &other) const {
  173. return pkts_count == other.pkts_count
  174. && kbytes == other.kbytes
  175. && ip_src_entropy == other.ip_src_entropy
  176. && ip_dst_entropy == other.ip_dst_entropy
  177. && ip_src_cum_entropy == other.ip_src_cum_entropy
  178. && ip_dst_cum_entropy == other.ip_dst_cum_entropy;
  179. }
  180. };
  181. // Aidmar
  182. /*
  183. * Struct used to represent:
  184. * - Number of packets from A to B
  185. * - Number of packets from B to A
  186. */
  187. struct entry_convStat {
  188. long pkts_A_B;
  189. long pkts_B_A;
  190. std::vector<std::chrono::microseconds> pkts_A_B_timestamp;
  191. std::vector<std::chrono::microseconds> pkts_B_A_timestamp;
  192. std::vector<std::chrono::microseconds> pkts_delay;
  193. //std::chrono::duration<double, std::micro> median_delay;
  194. std::chrono::microseconds avg_delay;
  195. bool operator==(const entry_convStat &other) const {
  196. return pkts_A_B == other.pkts_A_B
  197. && pkts_A_B_timestamp == other.pkts_A_B_timestamp
  198. && pkts_B_A_timestamp == other.pkts_B_A_timestamp
  199. && pkts_delay == other.pkts_delay
  200. && avg_delay == other.avg_delay;
  201. }
  202. };
  203. /*
  204. * Struct used to represent:
  205. * - IP address (IPv4 or IPv6)
  206. - Traffic direction (out: outgoing connection, in: incoming connection)
  207. * - Port number
  208. */
  209. struct ipAddress_inOut_port {
  210. std::string ipAddress;
  211. std::string trafficDirection;
  212. int portNumber;
  213. bool operator==(const ipAddress_inOut_port &other) const {
  214. return ipAddress == other.ipAddress
  215. && trafficDirection == other.trafficDirection
  216. && portNumber == other.portNumber;
  217. }
  218. };
  219. /*
  220. * Definition of hash functions for structs used as key in unordered_map
  221. */
  222. namespace std {
  223. template<>
  224. struct hash<ipAddress_ttl> {
  225. std::size_t operator()(const ipAddress_ttl &k) const {
  226. using std::size_t;
  227. using std::hash;
  228. using std::string;
  229. return ((hash<string>()(k.ipAddress)
  230. ^ (hash<int>()(k.ttlValue) << 1)) >> 1);
  231. }
  232. };
  233. // Aidmar
  234. template<>
  235. struct hash<ipAddress_mss> {
  236. std::size_t operator()(const ipAddress_mss &k) const {
  237. using std::size_t;
  238. using std::hash;
  239. using std::string;
  240. return ((hash<string>()(k.ipAddress)
  241. ^ (hash<int>()(k.mssValue) << 1)) >> 1);
  242. }
  243. };
  244. // Aidmar
  245. template<>
  246. struct hash<ipAddress_win> {
  247. std::size_t operator()(const ipAddress_win &k) const {
  248. using std::size_t;
  249. using std::hash;
  250. using std::string;
  251. return ((hash<string>()(k.ipAddress)
  252. ^ (hash<int>()(k.winSize) << 1)) >> 1);
  253. }
  254. };
  255. // Aidmar: TO-DO:??
  256. template<>
  257. struct hash<conv> {
  258. std::size_t operator()(const conv &k) const {
  259. using std::size_t;
  260. using std::hash;
  261. using std::string;
  262. return ((hash<string>()(k.ipAddressA)
  263. ^ (hash<int>()(k.portA) << 1)) >> 1)
  264. ^ ((hash<string>()(k.ipAddressB)
  265. ^ (hash<int>()(k.portB) << 1)) >> 1);
  266. }
  267. };
  268. template<>
  269. struct hash<ipAddress_protocol> {
  270. std::size_t operator()(const ipAddress_protocol &k) const {
  271. using std::size_t;
  272. using std::hash;
  273. using std::string;
  274. return ((hash<string>()(k.ipAddress)
  275. ^ (hash<string>()(k.protocol) << 1)) >> 1);
  276. }
  277. };
  278. template<>
  279. struct hash<ipAddress_inOut_port> {
  280. std::size_t operator()(const ipAddress_inOut_port &k) const {
  281. using std::size_t;
  282. using std::hash;
  283. using std::string;
  284. return ((hash<string>()(k.ipAddress)
  285. ^ (hash<string>()(k.trafficDirection) << 1)) >> 1)
  286. ^ (hash<int>()(k.portNumber) << 1);
  287. }
  288. };
  289. }
  290. class statistics {
  291. public:
  292. /*
  293. * Constructor
  294. */
  295. statistics();
  296. /*
  297. * Methods
  298. */
  299. std::string getFormattedTimestamp(time_t seconds, suseconds_t microseconds) const;
  300. /*
  301. * Access methods for containers
  302. */
  303. void incrementPacketCount();
  304. // Adimar
  305. void calculateIPIntervalPacketRate(std::chrono::duration<int, std::micro> interval, std::chrono::microseconds intervalStartTimestamp);
  306. void incrementMSScount(std::string ipAddress, int mssValue);
  307. void incrementWinCount(std::string ipAddress, int winSize);
  308. void addConvStat(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp);
  309. std::vector<float> calculateIPsCumEntropy();
  310. std::vector<float> calculateLastIntervalIPsEntropy(std::chrono::microseconds intervalStartTimestamp);
  311. void addIntervalStat(std::chrono::duration<int, std::micro> interval, std::chrono::microseconds intervalStartTimestamp, std::chrono::microseconds lastPktTimestamp, int previousPacketCount, float previousSumPacketSize);
  312. void incrementTTLcount(std::string ipAddress, int ttlValue);
  313. void incrementProtocolCount(std::string ipAddress, std::string protocol);
  314. void incrementPortCount(std::string ipAddressSender, int outgoingPort, std::string ipAddressReceiver,
  315. int incomingPort);
  316. int getProtocolCount(std::string ipAddress, std::string protocol);
  317. void setTimestampFirstPacket(Tins::Timestamp ts);
  318. void setTimestampLastPacket(Tins::Timestamp ts);
  319. // Aidmar
  320. Tins::Timestamp getTimestampFirstPacket();
  321. Tins::Timestamp getTimestampLastPacket();
  322. void assignMacAddress(std::string ipAddress, std::string macAddress);
  323. // Aidmar
  324. void addIpStat_packetSent(std::string filePath, std::string ipAddressSender, std::string ipAddressReceiver, long bytesSent, std::chrono::microseconds timestamp);
  325. int getPacketCount();
  326. int getSumPacketSize();
  327. void addMSS(std::string ipAddress, int MSSvalue);
  328. void writeToDatabase(std::string database_path);
  329. void addPacketSize(uint32_t packetSize);
  330. std::string getCaptureDurationTimestamp() const;
  331. float getCaptureDurationSeconds() const;
  332. float getAvgPacketSize() const;
  333. void printStats(std::string ipAddress);
  334. /*
  335. * IP Address-specific statistics
  336. */
  337. ip_stats getStatsForIP(std::string ipAddress);
  338. private:
  339. /*
  340. * Data fields
  341. */
  342. Tins::Timestamp timestamp_firstPacket;
  343. Tins::Timestamp timestamp_lastPacket;
  344. float sumPacketSize = 0;
  345. int packetCount = 0;
  346. /*
  347. * Data containers
  348. */
  349. // {IP Address, TTL value, count}
  350. std::unordered_map<ipAddress_ttl, int> ttl_distribution;
  351. // Aidmar
  352. // {IP Address, MSS value, count}
  353. std::unordered_map<ipAddress_mss, int> mss_distribution;
  354. // {IP Address, Win size, count}
  355. std::unordered_map<ipAddress_win, int> win_distribution;
  356. // {IP Address A, Port A, IP Address B, Port B, #packets_A_B, #packets_B_A}
  357. std::unordered_map<conv, entry_convStat> conv_statistics;
  358. std::unordered_map<std::string, entry_intervalStat> interval_statistics;
  359. // {IP Address, Protocol, count}
  360. std::unordered_map<ipAddress_protocol, int> protocol_distribution;
  361. // {IP Address, #received packets, #sent packets, Data received in kbytes, Data sent in kbytes}
  362. std::unordered_map<std::string, entry_ipStat> ip_statistics;
  363. // {IP Address, in_out, Port Number, count}
  364. std::unordered_map<ipAddress_inOut_port, int> ip_ports;
  365. // {IP Address, MAC Address}
  366. std::unordered_map<std::string, std::string> ip_mac_mapping;
  367. // {IP Address, avg MSS}
  368. std::unordered_map<std::string, int> ip_sumMss;
  369. };
  370. #endif //CPP_PCAPREADER_STATISTICS_H