3
0

statistics.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466
  1. /*
  2. * Class providing containers and access methods for statistical data collection.
  3. */
  4. #ifndef CPP_PCAPREADER_STATISTICS_H
  5. #define CPP_PCAPREADER_STATISTICS_H
  6. // Aidmar
  7. #include <vector>
  8. #include <unordered_map>
  9. #include <list>
  10. #include <tuple>
  11. #include <tins/timestamp.h>
  12. #include <tins/ip_address.h>
  13. #include "utilities.h"
  14. using namespace Tins;
  15. /*
  16. * Definition of structs used in unordered_map fields
  17. */
  18. /*
  19. * Struct used as data structure for method get_stats_for_ip, represents:
  20. * - Incoming bandwidth in KBits
  21. * - Outgoing bandwidth in KBits
  22. * - Number of incoming packets per second
  23. * - Number of outgoing packets per second
  24. * - Average size of sent packets in kbytes
  25. * - Average size of received packets in kybtes
  26. * - Average value of TCP option Maximum Segment Size (MSS)
  27. */
  28. struct ip_stats {
  29. float bandwidthKBitsIn;
  30. float bandwidthKBitsOut;
  31. float packetPerSecondIn;
  32. float packetPerSecondOut;
  33. float AvgPacketSizeSent;
  34. float AvgPacketSizeRecv;
  35. long AvgMaxSegmentSizeTCP;
  36. };
  37. // Aidmar
  38. /*
  39. * Struct used to represent a conv by:
  40. * - IP address A
  41. * - Port A
  42. * - IP address B
  43. * - Port B
  44. */
  45. struct conv{
  46. std::string ipAddressA;
  47. int portA;
  48. std::string ipAddressB;
  49. int portB;
  50. bool operator==(const conv &other) const {
  51. return ipAddressA == other.ipAddressA
  52. && portA == other.portA
  53. &&ipAddressB == other.ipAddressB
  54. && portB == other.portB;
  55. }
  56. };
  57. // Aidmar
  58. /*
  59. * Struct used to represent:
  60. * - IP address (IPv4 or IPv6)
  61. * - MSS value
  62. */
  63. struct ipAddress_mss {
  64. std::string ipAddress;
  65. int mssValue;
  66. bool operator==(const ipAddress_mss &other) const {
  67. return ipAddress == other.ipAddress
  68. && mssValue == other.mssValue;
  69. }
  70. };
  71. // Aidmar
  72. /*
  73. * Struct used to represent:
  74. * - IP address (IPv4 or IPv6)
  75. * - Window size
  76. */
  77. struct ipAddress_win {
  78. std::string ipAddress;
  79. int winSize;
  80. bool operator==(const ipAddress_win &other) const {
  81. return ipAddress == other.ipAddress
  82. && winSize == other.winSize;
  83. }
  84. };
  85. /*
  86. * Struct used to represent:
  87. * - IP address (IPv4 or IPv6)
  88. * - TTL value
  89. */
  90. struct ipAddress_ttl {
  91. std::string ipAddress;
  92. int ttlValue;
  93. bool operator==(const ipAddress_ttl &other) const {
  94. return ipAddress == other.ipAddress
  95. && ttlValue == other.ttlValue;
  96. }
  97. };
  98. /*
  99. * Struct used to represent:
  100. * - IP address (IPv4 or IPv6)
  101. * - Protocol (e.g. TCP, UDP, IPv4, IPv6)
  102. */
  103. struct ipAddress_protocol {
  104. std::string ipAddress;
  105. std::string protocol;
  106. bool operator==(const ipAddress_protocol &other) const {
  107. return ipAddress == other.ipAddress
  108. && protocol == other.protocol;
  109. }
  110. };
  111. /*
  112. * Struct used to represent:
  113. * - Number of received packets
  114. * - Number of sent packets
  115. * - Data received in kbytes
  116. * - Data sent in kbytes
  117. */
  118. struct entry_ipStat {
  119. long pkts_received;
  120. long pkts_sent;
  121. float kbytes_received;
  122. float kbytes_sent;
  123. // Aidmar
  124. std::string ip_class;
  125. std::vector<float> interval_pkt_rate;
  126. float max_pkt_rate;
  127. float min_pkt_rate;
  128. // Aidmar - to calculate Mahoney anomaly score
  129. long firstAppearAsSenderPktCount;
  130. long firstAppearAsReceiverPktCount;
  131. float sourceAnomalyScore;
  132. float destinationAnomalyScore;
  133. // Aidmar- To collect statstics over time interval
  134. std::vector<std::chrono::microseconds> pktsSentTimestamp;
  135. std::vector<std::chrono::microseconds> pktsReceivedTimestamp;
  136. bool operator==(const entry_ipStat &other) const {
  137. return pkts_received == other.pkts_received
  138. && pkts_sent == other.pkts_sent
  139. && kbytes_sent == other.kbytes_sent
  140. && kbytes_received == other.kbytes_received
  141. // Aidmar
  142. && interval_pkt_rate == other.interval_pkt_rate
  143. && max_pkt_rate == other.max_pkt_rate
  144. && min_pkt_rate == other.min_pkt_rate
  145. && ip_class == other.ip_class
  146. && firstAppearAsSenderPktCount == other.firstAppearAsSenderPktCount
  147. && firstAppearAsReceiverPktCount == other.firstAppearAsReceiverPktCount
  148. && sourceAnomalyScore == other.sourceAnomalyScore
  149. && destinationAnomalyScore == other.destinationAnomalyScore
  150. && pktsSentTimestamp == other.pktsSentTimestamp
  151. && pktsReceivedTimestamp == other.pktsReceivedTimestamp;
  152. }
  153. };
  154. // Aidmar
  155. /*
  156. * Struct used to represent interval statistics:
  157. * - Number of packets
  158. * - IP source entropy
  159. * - IP destination entropy
  160. * - IP source cumulative entropy
  161. * - IP destination cumulative entropy
  162. */
  163. struct entry_intervalStat {
  164. int pkts_count;
  165. float kbytes;
  166. float ip_src_entropy;
  167. float ip_dst_entropy;
  168. float ip_src_cum_entropy;
  169. float ip_dst_cum_entropy;
  170. int payload_count;
  171. int incorrect_checksum_count;
  172. int correct_checksum_count;
  173. int invalid_tos_count;
  174. int valid_tos_count;
  175. // Predictability score
  176. //float ip_src_pred_score;
  177. //float ip_dst_pred_score;
  178. bool operator==(const entry_intervalStat &other) const {
  179. return pkts_count == other.pkts_count
  180. && kbytes == other.kbytes
  181. && ip_src_entropy == other.ip_src_entropy
  182. && ip_dst_entropy == other.ip_dst_entropy
  183. && ip_src_cum_entropy == other.ip_src_cum_entropy
  184. && ip_dst_cum_entropy == other.ip_dst_cum_entropy
  185. && payload_count == other.payload_count
  186. && incorrect_checksum_count == other.incorrect_checksum_count
  187. && invalid_tos_count == other.invalid_tos_count
  188. && valid_tos_count == other.valid_tos_count;
  189. }
  190. };
  191. // Aidmar
  192. /*
  193. * Struct used to represent:
  194. * - Number of packets from A to B
  195. * - Number of packets from B to A
  196. */
  197. struct entry_convStat {
  198. long pkts_A_B;
  199. long pkts_B_A;
  200. std::vector<std::chrono::microseconds> pkts_A_B_timestamp;
  201. std::vector<std::chrono::microseconds> pkts_B_A_timestamp;
  202. std::vector<std::chrono::microseconds> pkts_delay;
  203. //std::chrono::duration<double, std::micro> median_delay;
  204. std::chrono::microseconds avg_delay;
  205. bool operator==(const entry_convStat &other) const {
  206. return pkts_A_B == other.pkts_A_B
  207. && pkts_A_B_timestamp == other.pkts_A_B_timestamp
  208. && pkts_B_A_timestamp == other.pkts_B_A_timestamp
  209. && pkts_delay == other.pkts_delay
  210. && avg_delay == other.avg_delay;
  211. }
  212. };
  213. /*
  214. * Struct used to represent:
  215. * - IP address (IPv4 or IPv6)
  216. - Traffic direction (out: outgoing connection, in: incoming connection)
  217. * - Port number
  218. */
  219. struct ipAddress_inOut_port {
  220. std::string ipAddress;
  221. std::string trafficDirection;
  222. int portNumber;
  223. bool operator==(const ipAddress_inOut_port &other) const {
  224. return ipAddress == other.ipAddress
  225. && trafficDirection == other.trafficDirection
  226. && portNumber == other.portNumber;
  227. }
  228. };
  229. /*
  230. * Definition of hash functions for structs used as key in unordered_map
  231. */
  232. namespace std {
  233. template<>
  234. struct hash<ipAddress_ttl> {
  235. std::size_t operator()(const ipAddress_ttl &k) const {
  236. using std::size_t;
  237. using std::hash;
  238. using std::string;
  239. return ((hash<string>()(k.ipAddress)
  240. ^ (hash<int>()(k.ttlValue) << 1)) >> 1);
  241. }
  242. };
  243. // Aidmar
  244. template<>
  245. struct hash<ipAddress_mss> {
  246. std::size_t operator()(const ipAddress_mss &k) const {
  247. using std::size_t;
  248. using std::hash;
  249. using std::string;
  250. return ((hash<string>()(k.ipAddress)
  251. ^ (hash<int>()(k.mssValue) << 1)) >> 1);
  252. }
  253. };
  254. // Aidmar
  255. template<>
  256. struct hash<ipAddress_win> {
  257. std::size_t operator()(const ipAddress_win &k) const {
  258. using std::size_t;
  259. using std::hash;
  260. using std::string;
  261. return ((hash<string>()(k.ipAddress)
  262. ^ (hash<int>()(k.winSize) << 1)) >> 1);
  263. }
  264. };
  265. // Aidmar: TO-DO:??
  266. template<>
  267. struct hash<conv> {
  268. std::size_t operator()(const conv &k) const {
  269. using std::size_t;
  270. using std::hash;
  271. using std::string;
  272. return ((hash<string>()(k.ipAddressA)
  273. ^ (hash<int>()(k.portA) << 1)) >> 1)
  274. ^ ((hash<string>()(k.ipAddressB)
  275. ^ (hash<int>()(k.portB) << 1)) >> 1);
  276. }
  277. };
  278. template<>
  279. struct hash<ipAddress_protocol> {
  280. std::size_t operator()(const ipAddress_protocol &k) const {
  281. using std::size_t;
  282. using std::hash;
  283. using std::string;
  284. return ((hash<string>()(k.ipAddress)
  285. ^ (hash<string>()(k.protocol) << 1)) >> 1);
  286. }
  287. };
  288. template<>
  289. struct hash<ipAddress_inOut_port> {
  290. std::size_t operator()(const ipAddress_inOut_port &k) const {
  291. using std::size_t;
  292. using std::hash;
  293. using std::string;
  294. return ((hash<string>()(k.ipAddress)
  295. ^ (hash<string>()(k.trafficDirection) << 1)) >> 1)
  296. ^ (hash<int>()(k.portNumber) << 1);
  297. }
  298. };
  299. }
  300. class statistics {
  301. public:
  302. /*
  303. * Constructor
  304. */
  305. statistics();
  306. /*
  307. * Methods
  308. */
  309. std::string getFormattedTimestamp(time_t seconds, suseconds_t microseconds) const;
  310. /*
  311. * Access methods for containers
  312. */
  313. void incrementPacketCount();
  314. // Adimar
  315. void calculateIPIntervalPacketRate(std::chrono::duration<int, std::micro> interval, std::chrono::microseconds intervalStartTimestamp);
  316. void incrementMSScount(std::string ipAddress, int mssValue);
  317. void incrementWinCount(std::string ipAddress, int winSize);
  318. void addConvStat(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp);
  319. std::vector<float> calculateIPsCumEntropy();
  320. std::vector<float> calculateLastIntervalIPsEntropy(std::chrono::microseconds intervalStartTimestamp);
  321. void addIntervalStat(std::chrono::duration<int, std::micro> interval, std::chrono::microseconds intervalStartTimestamp, std::chrono::microseconds lastPktTimestamp, int previousPacketCount, float previousSumPacketSize);
  322. void checkPayload(const PDU *pdu_l4);
  323. void checkTCPChecksum(std::string ipAddressSender, std::string ipAddressReceiver, TCP tcpPkt);
  324. void checkToS(uint8_t ToS);
  325. void incrementTTLcount(std::string ipAddress, int ttlValue);
  326. void incrementProtocolCount(std::string ipAddress, std::string protocol);
  327. void incrementPortCount(std::string ipAddressSender, int outgoingPort, std::string ipAddressReceiver,
  328. int incomingPort);
  329. int getProtocolCount(std::string ipAddress, std::string protocol);
  330. void setTimestampFirstPacket(Tins::Timestamp ts);
  331. void setTimestampLastPacket(Tins::Timestamp ts);
  332. // Aidmar
  333. Tins::Timestamp getTimestampFirstPacket();
  334. Tins::Timestamp getTimestampLastPacket();
  335. void assignMacAddress(std::string ipAddress, std::string macAddress);
  336. // Aidmar
  337. void addIpStat_packetSent(std::string filePath, std::string ipAddressSender, std::string ipAddressReceiver, long bytesSent, std::chrono::microseconds timestamp);
  338. int getPacketCount();
  339. int getSumPacketSize();
  340. void addMSS(std::string ipAddress, int MSSvalue);
  341. void writeToDatabase(std::string database_path);
  342. void addPacketSize(uint32_t packetSize);
  343. std::string getCaptureDurationTimestamp() const;
  344. float getCaptureDurationSeconds() const;
  345. float getAvgPacketSize() const;
  346. void printStats(std::string ipAddress);
  347. /*
  348. * IP Address-specific statistics
  349. */
  350. ip_stats getStatsForIP(std::string ipAddress);
  351. // Aidmar
  352. bool getDoTests();
  353. void setDoTests(bool var);
  354. // TO-DO: move to private section
  355. std::unordered_map<int, int> dscp_distribution;
  356. private:
  357. /*
  358. * Data fields
  359. */
  360. Tins::Timestamp timestamp_firstPacket;
  361. Tins::Timestamp timestamp_lastPacket;
  362. float sumPacketSize = 0;
  363. int packetCount = 0;
  364. // Aidmar
  365. bool doTests = false;
  366. int payloadCount = 0;
  367. int incorrectTCPChecksumCount = 0;
  368. int correctTCPChecksumCount = 0;
  369. int validToSCount = 0;
  370. int invalidToSCount = 0;
  371. /*
  372. * Data containers
  373. */
  374. // {IP Address, TTL value, count}
  375. std::unordered_map<ipAddress_ttl, int> ttl_distribution;
  376. // Aidmar
  377. // {IP Address, MSS value, count}
  378. std::unordered_map<ipAddress_mss, int> mss_distribution;
  379. // {IP Address, Win size, count}
  380. std::unordered_map<ipAddress_win, int> win_distribution;
  381. // {IP Address A, Port A, IP Address B, Port B, #packets_A_B, #packets_B_A}
  382. std::unordered_map<conv, entry_convStat> conv_statistics;
  383. std::unordered_map<std::string, entry_intervalStat> interval_statistics;
  384. // {IP Address, Protocol, count}
  385. std::unordered_map<ipAddress_protocol, int> protocol_distribution;
  386. // {IP Address, #received packets, #sent packets, Data received in kbytes, Data sent in kbytes}
  387. std::unordered_map<std::string, entry_ipStat> ip_statistics;
  388. // {IP Address, in_out, Port Number, count}
  389. std::unordered_map<ipAddress_inOut_port, int> ip_ports;
  390. // {IP Address, MAC Address}
  391. std::unordered_map<std::string, std::string> ip_mac_mapping;
  392. // {IP Address, avg MSS}
  393. std::unordered_map<std::string, int> ip_sumMss;
  394. };
  395. #endif //CPP_PCAPREADER_STATISTICS_H